This notebook generates a list of charges for each microstate.

In [4]:
import os
os.environ["OE_LICENSE"] = "C:\\Users\\rustenba\\Downloads\\oe_license.txt"

from openeye import oechem
import openeye
import oenotebook as oenb
import csv

from pathlib import Path
import pandas as pd
import numpy as np

if not openeye.OEChemIsLicensed():
    raise RunTimeError("Can't find a valid OpenEye license")

### Loading microstates from SAMPL6 files

SAMPL6 microstate definitions are loaded into a dictionary using `load_microstate_dictionary`.

By default it uses from the files at www.github.com/MobleyLab/SAMPL6 , a local copy is stored in the microstates folder.

The states are read in as smiles. For a proper comparison of microstates, explicit hydrogens are added to the molecules using the `add_h` function.



In [28]:
def add_h(mol: oechem.OEMol):
    """Add explicit hydrogens to a molecule"""
    for atom in mol.GetAtoms():
        oechem.OEAddExplicitHydrogens(mol, atom)

def total_charge(mol: oechem.OEMol):
    """Sum over the formal charges in the molecule to return the total"""
    total = 0.0
    for atom in mol.GetAtoms():        
        total += atom.GetFormalCharge()
    return int(total)
        
def load_microstate_dictionary(sampl_id: str, microstate_folder: str= "./microstates/"):
    """Load a dictionary of microstates as defined by SAMPL from the microstate files.
    
    Parameters
    ----------
    sampl_id - the identifier for the molecule in sampl, e.g. SM12 
    
    Returns
    -------
    dict - key : microstate_id, value : oe_mol
    """

    microstates = pd.DataFrame(columns=["Microstate", "Microstate ID", "Charge"])

    # Locate the file containing state definitions
    filename = "{}_microstates.csv".format(sampl_id)
    full_path = os.path.join(microstate_folder, filename)
    mypath = Path(full_path)
    if not mypath.is_file():
        raise ValueError("No microstate definitions were found for molecule {}. Check for typos.".format(sampl_id))
    
    # SAMPL6 CSV files are not OpenEye compatible, so instead we read the csv line by line and translate the smiles.
    with open(full_path, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
        
        for row in csvreader:
            microstate = dict()
            key = row['microstate ID']
            val = oechem.OEGraphMol()
            oechem.OESmilesToMol(val, row['canonical isomeric SMILES'])
            add_h(val)
            
            microstate["Microstate"] = val            
            microstate["Microstate ID"] = key
            microstate["Charge"] = total_charge(val)
            microstates = microstates.append(microstate, ignore_index=True)
            
    return microstates

In [29]:
df = pd.DataFrame(columns=["Microstate", "Microstate ID", "Charge", "Molecule"])

for mol in range(24):
    mol_name = "SM{:02d}".format(mol+1)
    new_df = load_microstate_dictionary(mol_name)
    new_df["Molecule"] = mol_name
    df = df.append(new_df, ignore_index=True)

df[["Molecule", "Microstate ID", "Charge"]].to_csv("../charges_per_state.csv", index=False)
