In [2]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.DataStructs.cDataStructs import TanimotoSimilarity

def smiles_to_fingerprint(smiles):
    # Handle NaN or missing values
    if pd.isna(smiles) or not isinstance(smiles, str) or smiles.strip() == "":
        return None
    try:
        # Convert SMILES to a molecule
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            # Generate the fingerprint (Morgan fingerprint / ECFP)
            return AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)
        else:
            return None
    except:
        return None

def calculate_tanimoto_similarity(fp1, fp2):
    if fp1 is None or fp2 is None:
        return None
    return TanimotoSimilarity(fp1, fp2)

# Read the Excel file (change file path as needed)
file_path = "C:/Users/ganes/OneDrive/Desktop/AI/Template/Generated_Templates.xlsx"
df = pd.read_excel(file_path)
# Assuming the SMILES are in two columns, 'Generated_SMILES' and 'Input_SMILES'
generated_smiles_col = "Output"
input_smiles_col = "Input"

# Convert SMILES to fingerprints, handling invalid or missing SMILES
df['Generated_Fingerprint'] = df[generated_smiles_col].apply(smiles_to_fingerprint)
df['Input_Fingerprint'] = df[input_smiles_col].apply(smiles_to_fingerprint)

# Calculate Tanimoto similarity
df['Tanimoto_Similarity'] = df.apply(
    lambda row: calculate_tanimoto_similarity(row['Generated_Fingerprint'], row['Input_Fingerprint']), axis=1
)

# Drop fingerprint columns (optional, for cleaner output)
df = df.drop(columns=['Generated_Fingerprint', 'Input_Fingerprint'])

# Save the result back to an Excel file
output_file_path = "tanimoto_similarity_output.xlsx"
df.to_excel(output_file_path, index=False)

print(f"Tanimoto similarity calculation completed. Results saved to {output_file_path}.")


Tanimoto similarity calculation completed. Results saved to tanimoto_similarity_output.xlsx.




In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.DataStructs.cDataStructs import TanimotoSimilarity

# Function to convert SMILES to Morgan fingerprint
def smiles_to_fingerprint(smiles):
    if pd.isna(smiles) or not isinstance(smiles, str) or smiles.strip() == "":
        return None
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)
        else:
            return None
    except:
        return None

# Function to calculate Tanimoto similarity
def calculate_tanimoto_similarity(fp1, fp2):
    if fp1 is None or fp2 is None:
        return None
    return TanimotoSimilarity(fp1, fp2)

# Read the Excel file (change file path as needed)
file_path = "C:/Users/ganes/OneDrive/Desktop/AI/Template/Generated_Templates.xlsx"
df = pd.read_excel(file_path)

# Assuming the SMILES are in two columns, 'Input' and 'Output'
input_smiles_col = "Input"
output_smiles_col = "Output"

# Convert Input and Output SMILES to fingerprints
df['Input_Fingerprint'] = df[input_smiles_col].apply(smiles_to_fingerprint)
df['Output_Fingerprint'] = df[output_smiles_col].apply(smiles_to_fingerprint)

# Initialize an empty DataFrame to store pairwise Tanimoto similarities
similarity_matrix = pd.DataFrame(index=df.index, columns=df.index)

# Perform pairwise comparison
for i, row_input in df.iterrows():
    for j, row_output in df.iterrows():
        # Calculate Tanimoto similarity between Input SMILES of row i and Output SMILES of row j
        tanimoto_similarity = calculate_tanimoto_similarity(row_input['Input_Fingerprint'], row_output['Output_Fingerprint'])
        similarity_matrix.at[i, j] = tanimoto_similarity

# Save the pairwise similarity matrix to an Excel file
output_file_path = "pairwise_tanimoto_similarity_output.xlsx"
similarity_matrix.to_excel(output_file_path)

print(f"Pairwise Tanimoto similarity calculation completed. Results saved to {output_file_path}.")




Pairwise Tanimoto similarity calculation completed. Results saved to pairwise_tanimoto_similarity_output.xlsx.
