<a href="https://colab.research.google.com/github/deepsharma26/SIRT1_Main/blob/Descriptor_genration/morgan_fp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
!pip install rdkit-pypi
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem

Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Downloading rdkit_pypi-2022.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit-pypi
Successfully installed rdkit-pypi-2022.9.5


In [3]:
def calculate_morgan_fingerprint(smiles, radius=2, n_bits=2048):
    """
    Calculate the Morgan fingerprint for a given molecule.

    Parameters:
    - smiles (str): The SMILES string of the molecule.
    - radius (int): Radius of the Morgan fingerprint.
    - n_bits (int): Number of bits for the fingerprint.

    Returns:
    - fingerprint (list): The Morgan fingerprint as a bit vector.
    """
    try:

        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            return None


        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)


        return list(fp)
    except Exception as e:
        print(f"Error processing SMILES {smiles}: {e}")
        return None

def process_csv(input_csv, output_csv, smiles_column='canonical_smiles', radius=2, n_bits=2048):
    """
    Process a CSV file to calculate Morgan fingerprints for each SMILES.

    Parameters:
    - input_csv (str): Path to the input CSV file containing SMILES strings.
    - output_csv (str): Path to the output CSV file to save fingerprints.
    - smiles_column (str): Column name in the CSV that contains SMILES strings.
    - radius (int): Radius of the Morgan fingerprint.
    - n_bits (int): Number of bits for the fingerprint.
    """

    df = pd.read_csv('/content/SIRT1_04_bioactivity_data_3class_pIC50.csv')

    if smiles_column not in df.columns:
        print(f"Column '{smiles_column}' not found in the input CSV.")
        return


    fingerprints = df[smiles_column].apply(lambda x: calculate_morgan_fingerprint(x, radius, n_bits))


    fingerprint_df = pd.DataFrame(fingerprints.tolist(), columns=[f'FP_{i}' for i in range(n_bits)])


    result_df = pd.concat([df, fingerprint_df], axis=1)


    result_df.to_csv(output_csv, index=False)
    print(f"Fingerprints saved to {output_csv}")


if __name__ == "__main__":
    input_csv = "/content/SIRT1_05_bioactivity_data_2class_pIC50.csv"
    output_csv = "molecules_with_morganfingerprints.csv"
    smiles_column = "canonical_smiles"
    process_csv(input_csv, output_csv, smiles_column)

Fingerprints saved to molecules_with_morganfingerprints.csv
