In [21]:
from rdkit import Chem
import networkx as nx

def molecule_to_graph(mol):
    # Create an empty graph
    G = nx.Graph()
    
    # Add nodes for each atom in the molecule
    for atom in mol.GetAtoms():
        G.add_node(atom.GetIdx(), element=atom.GetSymbol())
    
    # Add edges for each bond in the molecule
    for bond in mol.GetBonds():
        G.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), order=bond.GetBondType())
    
    return G

# Example usage
smiles = "C=C[C@@H]1[C@@H](CSCc2ccccc2)N(C(=O)OC)SN1C(=O)OC" 
mol = Chem.MolFromSmiles(smiles)
graph = molecule_to_graph(mol)

print(graph.nodes(data=True))
print(graph.edges(data=True))


[(0, {'element': 'C'}), (1, {'element': 'N'}), (2, {'element': 'S'}), (3, {'element': 'N'}), (4, {'element': 'C'}), (5, {'element': 'C'}), (6, {'element': 'C'}), (7, {'element': 'C'}), (8, {'element': 'C'}), (9, {'element': 'C'}), (10, {'element': 'C'}), (11, {'element': 'O'}), (12, {'element': 'O'}), (13, {'element': 'C'}), (14, {'element': 'O'}), (15, {'element': 'O'}), (16, {'element': 'C'})]
[(0, 1, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (0, 14, {'order': rdkit.Chem.rdchem.BondType.DOUBLE}), (0, 15, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (1, 2, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (1, 5, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (2, 3, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (3, 4, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (3, 10, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (4, 5, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (4, 7, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (5, 6, {'order': rdkit.Chem.rdchem.BondType.SINGLE}), (7,

In [22]:
from rdkit.Chem import rdFingerprintGenerator

fingerprint_gen = rdFingerprintGenerator.GetRDKitFPGenerator()
fingerprint = fingerprint_gen.GetFingerprint(mol)

print(fingerprint.ToBitString())

0101100000001100000011111101000001000010000010000010001000000111010000000010100000101000010000010101000011100100100000001010000001000000011101101010000000011010000000100001100000000110001010010010000000100001111000010100100000001001011000000001010000010000100000011000010010101000001010000101000000011011101110110001110110111100111001001000000000010010110000100100010001000100010000111011010001010000000100000000101000000101010000010011110010011100100000100001011000100100100011110001000010100000110111000110010000100101011000000001010110010000000000101101010101100000010100000000001000100000000010110101000100011000101100100000000100000111000100100010000100000000001000011001000001110000000001101010010100010100101010000000000100000000010100010100100101100000100001101101010110111110010000100101100110111000100101010101100010111000000100001000011101000010100000000010101010110001100101100000000001011000101011000000011001010001011011000000110001101100000000000000010001001100001100100100000001000101

In [23]:
from rdkit.Chem import Descriptors

mol_weight = Descriptors.MolWt(mol)
logp = Descriptors.MolLogP(mol)
num_h_donors = Descriptors.NumHDonors(mol)
num_h_acceptors = Descriptors.NumHAcceptors(mol)
tpsa = Descriptors.TPSA(mol)

print(f"Molecular weight: {mol_weight}")
print(f"logP: {logp}")
print(f"Number of hydrogen bond donors: {num_h_donors}")
print(f"Number of hydrogen bond acceptors: {num_h_acceptors}")
print(f"Topological polar surface area: {tpsa}")


Molecular weight: 260.31499999999994
logP: 2.0332
Number of hydrogen bond donors: 0
Number of hydrogen bond acceptors: 5
Topological polar surface area: 59.08


In [7]:
substructure = Chem.MolFromSmarts("CC")  # Ethyl group
mol = Chem.MolFromSmiles("CCO")  # Ethanol
contains_substructure = mol.HasSubstructMatch(substructure)

print(f"Contains ethyl group: {contains_substructure}")


Contains ethyl group: True


In [35]:
import pandas as pd

# Specify the correct encoding when reading the CSV file
df = pd.read_csv("/usr/scratch/NASA/results/230823_01_Atacama_Soil_300uLDCM_100oC24h_Result.csv", encoding='latin-1')

df.iloc[0]

Name                      1,2,5-THIADIAZOLIDINE-2,5-DICARBOXYLIC ACID, 3...
R.T. (s)                                                        2200, 0.727
Type                                                                Unknown
Base Mass                                                            48.984
Concentration                                                           NaN
Sample Concentration                                                    NaN
Match                                                                   NaN
Quant Masses                                            BPI(48.984026±5ppm)
Quant S/N                                                               5.0
Area                                                                28258.0
Integration Modified                                                    NaN
Quantitation Analyte                                                    NaN
1st Dimension Time (s)                                               2200.0
2nd Dimensio

In [36]:
from rdkit import Chem

# Example InChI string
inchi = 'InChI=1S/C16H20N2O4S2/c1-4-13-14(11-23-10-12-8-6-5-7-9-12)18(16(20)22-3)24-17(13)15(19)21-2/h4-9,13-14H,1,10-11H2,2-3H3/t13-,14-/m1/s1'

# Convert InChI to RDKit molecule object
mol = Chem.MolFromInchi(inchi)

# Convert molecule object to SMILES string
smiles = Chem.MolToSmiles(mol)

print(smiles)


C=C[C@@H]1[C@@H](CSCc2ccccc2)N(C(=O)OC)SN1C(=O)OC
