In [12]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from mordred import Calculator, descriptors
import os

In [28]:
# Load input data from CSV file
input_file = 'data/OROP_updated.csv'
input_data = pd.read_csv(input_file)
input_data


Unnamed: 0,system_number,dG_red,Solvent,Charge_gn,Charge_rd,UHF_gn,UHF_rd,SMILES_1,NumAtoms_1
49,1.41,acetonitrile,1,0,1,0,COc1ccccc1O,17,
181,-1.10,acetonitrile,0,-1,0,1,O=C(O)c1ccccc1-c1c2cc(Br)c(=O)c(Br)c-2oc2c(Br)...,37,
52,1.69,acetonitrile,1,0,1,0,Oc1ccc(Br)cc1,13,
27,1.73,acetonitrile,1,0,1,0,CC=Cc1ccc(Cl)cc1,19,
139,1.67,acetonitrile,1,0,1,0,C(=Nc1ccccc1)c1ccccc1,25,
...,...,...,...,...,...,...,...,...,...
18,1.60,acetonitrile,1,0,1,0,CC=Cc1cccc(OC)c1,23,
62,2.50,acetonitrile,1,0,1,0,C1COCCO1,14,
129,1.95,acetonitrile,1,0,1,0,O=Cc1ccc(O)cc1,15,
59,1.51,acetonitrile,1,0,1,0,C1=COCCC1,14,


In [14]:
# Function to generate 0D descriptors
def generate_0D_descriptors(mol):
    return {
        'MolWt': Descriptors.MolWt(mol),
        'NumAtoms': mol.GetNumAtoms(),
        'NumHeteroatoms': Descriptors.NumHeteroatoms(mol)
    }

# Function to generate 1D descriptors
def generate_1D_descriptors(mol):
    return {
        'NumRings': Descriptors.RingCount(mol),
        'NumRotatableBonds': Descriptors.NumRotatableBonds(mol)
    }

# Function to generate 2D descriptors
def generate_2D_descriptors(mol):
    descriptors = {
        'TPSA': Descriptors.TPSA(mol),
        'LogP': Descriptors.MolLogP(mol),
        'NumHBD': rdMolDescriptors.CalcNumHBD(mol),
        'NumHBA': rdMolDescriptors.CalcNumHBA(mol),
        'FractionCSP3': rdMolDescriptors.CalcFractionCSP3(mol)
    }
    return descriptors

In [25]:

# Function to add charge and unpaired electrons descriptors
def generate_charge_descriptors(row):
    return {
        'Charge_gn': row['Charge_gn'],
        'UHF_gn': row['UHF_gn'],
        'Charge_rd': row['Charge_rd'],
        'UHF_rd': row['UHF_rd']
    }

from mordred import Calculator, descriptors

# Initialize the Mordred calculator
calc = Calculator(descriptors, ignore_3D=True)

def generate_descriptors(smiles, row):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None

    # Calculate descriptors using Mordred
    mordred_result = calc(mol)
    mordred_dict = mordred_result.asdict()

    # Calculate 0D, 1D, and 2D descriptors using RDKit
    rdkit_descriptors = {
        'MolWt': Descriptors.MolWt(mol),
        'NumAtoms': mol.GetNumAtoms(),
        'NumHeteroatoms': Descriptors.NumHeteroatoms(mol),
        'NumRings': Descriptors.RingCount(mol),
        'NumRotatableBonds': Descriptors.NumRotatableBonds(mol),
        'TPSA': Descriptors.TPSA(mol),
        'LogP': Descriptors.MolLogP(mol),
        'NumHBD': rdMolDescriptors.CalcNumHBD(mol),
        'NumHBA': rdMolDescriptors.CalcNumHBA(mol),
        'FractionCSP3': rdMolDescriptors.CalcFractionCSP3(mol)
    }

    # Combine Mordred and RDKit descriptors
    descriptors = {**mordred_dict, **rdkit_descriptors}

    # Add charge and unpaired electrons descriptors
    descriptors.update(generate_charge_descriptors(row))

    return descriptors




By adding these checks, you can identify where the type mismatch is occurring and ensure that the correct types are being used throughout your code.

In [9]:
# Save to CSV
output_data.to_csv('chemical_descriptors.csv', index=False)
print("Descriptors generated and saved to 'chemical_descriptors.csv'.")

Descriptors generated and saved to 'chemical_descriptors.csv'.
