In [19]:
import numpy
import csv
import glob,re
import rdkit
import pandas as pd
from rdkit import DataStructs
from rdkit.Chem import MolFromSmiles
from rdkit.Chem import AllChem
from rdkit.Chem import PandasTools
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate
from rdkit.Chem import Descriptors


class Fingerprint:

#Initializing objects of Fingerprint class
    def __init__(self, Mols):
        self.Mols = Mols
        self.Mols_converted = self.Molecule_Convert()

#Converting molecules to the form suitable for Fingerprint calculations
    def Molecule_Convert(self):
        Mols_convert=[]
        for i in self.Mols:
            m=MolFromSmiles(i)
            Mols_convert.append(m)
        return Mols_convert


#Morgan Fingerprints -Explicit bit vectors; nBits should be changed for calculating 512 or 1024 bit fingerprints
    def Morgan(self):
        fps = [AllChem.GetMorganFingerprintAsBitVect(m,2,nBits=256) for m in self.Mols_converted]
        np_fps = []
        for fp in fps:
            arr = numpy.zeros((1,))
            DataStructs.ConvertToNumpyArray(fp, arr)
            np_fps.append(arr)
        df = pd.DataFrame(np_fps)
        return df


In [20]:
#Molecules are provided as input and the functions from Fingerprint class 
#are used for converting the molecules and calculating Morgan fingerprints

#Reading Smiles and compound names 
#A semi colon separated file with smiles in first column and Compound names in second column
#Headers should be included
data=pd.read_csv('LINCS_PertID_SMILES.csv',sep=";")
Mols=data[data.columns[0]]
Mols_names=data[data.columns[1]]

#Calculating Fingerprints and assigning Molecule names to each row
F=Fingerprint(Mols)
F_Morgan=F.Morgan()
F_Morgan['CompoundNames']=Mols_names
    
#Generating File names and writing fingerprints / PhysChem properties to a csv file
FileName_Morgan="LINCS_MorganFP_256.csv"
F_Morgan.to_csv(FileName_Morgan,sep=",",index=False)
print("Fingerprint file generated")

Fingerprint file generated
