In [None]:
import numpy as np
import pandas as pd
from equilibrator_api import ComponentContribution, Q_
CC = ComponentContribution()
from equilibrator_assets.generate_compound import create_compound, get_or_create_compound
from ast import literal_eval

In [None]:
def GenThermoData(row):
    compounds = []
    coefficients = []
    reagents = literal_eval(row[1])
    for i in range(len(reagents)):
        compounds.append(reagents[i]) #add reagents
        coefficients.append(-1)
    products = literal_eval(row[2])
    for i in range(len(products)):
        compounds.append(products[i]) #add products
        coefficients.append(1)
    
    compound_list = get_or_create_compound(CC.ccache, compounds, mol_format="smiles")
    
    mus = []
    sigma_vecs = []
    for c in compound_list:
        mu = (CC.predictor.preprocess.get_compound_prediction(c))[0]
        sigma_vec = (CC.predictor.preprocess.get_compound_prediction(c))[1]
        mus.append(mu)
        sigma_vecs.append(sigma_vec)                                                                             
    
    mus = Q_(mus, "kJ/mol")
    sigma_vecs = Q_(sigma_vecs, "kJ/mol")
    
    S = np.zeros(len(compounds))
    for i in range(len(S)):
        S[i] = coefficients[i]
    
    standard_dgs = S.T @ mus
    U = S.T @ sigma_vecs
    return(standard_dgs._magnitude.round(2))

In [None]:
def UnitTest(test_row, expected_answer):
    test_answer = GenThermoData(test_row)
    if test_answer == expected_answer:
        print(f'Test was successful, Expected answer = {expected_answer} and Calculated answer = {test_answer}')
    else:
        print(f'Test was unsuccessful, Expected answer = {expected_answer} and Calculated answer = {test_answer}')
UnitTest(['4_0', "['OCC(=O)C(O)=O', 'O']", "['OCC=O', 'OC([O-])=O']"], -25.61)

In [None]:
def ApplyThermoGen(filepath, name):
    df = pd.read_csv(filepath, sep='\t') 
    df['DeltaG'] = df.apply(GenThermoData, axis=1, raw=True, result_type='expand')
    df.to_csv(f'{name}111RelsWithThermo.tsv', header=None, index=None, sep='\t', mode='a')
    return(df)

%%time
ApplyThermoGen('FormoseG3processedrels.tsv', 'Formose_3')

In [None]:
def ThermoGen2(filepath, name):
    df = pd.read_csv(filepath, sep='\t') 
    indexes = []
    compounds = []
    Reagents = []
    Products = []
    rules = []
    for i in range(len(df['Index'])):
        indexes.append(df['Index'][i])
        rules.append(df['Rule'][i])
        reagents = literal_eval(df['Reagents'][i])
        Reagents.append(reagents)
        products = literal_eval(df['Products'][i])
        Products.append(products)
        for j in range(len(reagents)):
            if reagents[j] not in compounds:
                compounds.append(reagents[j])
        for k in range(len(products)):
            if products[k] not in compounds:
                compounds.append(products[k])
    
    print('Checkpoint 1')
    
    compound_list = get_or_create_compound(CC.ccache, compounds, mol_format="smiles")
    
    print('Checkpoint 2')
    
    mus = []
    sigma_vecs = []
    for c in compound_list:
        mu = (CC.predictor.preprocess.get_compound_prediction(c))[0]
        sigma_vec = (CC.predictor.preprocess.get_compound_prediction(c))[1]
        mus.append(mu)
        sigma_vecs.append(sigma_vec)
    
    print('Checkpoint 3')
    
    EnergyChanges = []
    for i in range(len(df['Index'])):
        print(i)
        dummy_mus = []
        dummy_sigma_vecs = []
        dummy_compounds = []
        dummy_coefficients = []
        reagents = literal_eval(df['Reagents'][i])
        products = literal_eval(df['Products'][i])
        for j in range(len(reagents)):
            dummy_compounds.append(reagents[j])
            dummy_coefficients.append(-1)
        for k in range(len(products)):
            dummy_compounds.append(products[k])
            dummy_coefficients.append(1)
        for m in range(len(dummy_compounds)):
            dummy_mus.append(mus[compounds.index(dummy_compounds[m])])
            dummy_sigma_vecs.append(sigma_vecs[compounds.index(dummy_compounds[m])])
        
        S = np.zeros(len(dummy_compounds))
        for n in range(len(dummy_coefficients)):
            S[n] = dummy_coefficients[n]
        dummy_mus = Q_(dummy_mus, "kJ/mol")
        dummy_sigma_vecs = Q_(dummy_sigma_vecs, "kJ/mol")
        standard_dgs = S.T @ dummy_mus
        U = S.T @ dummy_sigma_vecs
        EnergyChanges.append(standard_dgs._magnitude.round(2))
    
    outputdata = {'Index':indexes, 'Reagents':Reagents, 'Products':Products, 'Rule':rules, 'Energy Change':EnergyChanges}
    outputdf = pd.DataFrame(outputdata)
    outputdf.to_csv(f'{name}222RelsWithThermo.tsv', header=None, index=None, sep='\t', mode='a')
    return(outputdf)

In [None]:
%%time
ThermoGen2('FormoseG3processedrels.tsv', 'Formose_3')