# Sprint 4 Analysis

In [1]:
from arsenic import plotting, stats # useful FE analysis functions
import json
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import numpy as np
import seaborn as sns
from openeye import oechem
import numpy as np
from fah_xchem.analysis.constants import KT_KCALMOL # to convert from kT to kcal per mol

In [2]:
# Read in the analysis.json for Sprint 4
# Each transform is the backwards transform (?)
with open('../../data/sprint-4/analysis.json', 'r') as file:
    data = json.load(file)

In [4]:
data['series'].keys()

dict_keys(['metadata', 'compounds', 'transformations'])

In [9]:
data['series']['transformations'][0]['transformation']

{'run_id': 4,
 'xchem_fragment_id': 'x3110',
 'initial_microstate': {'compound_id': 'EN300-26619500',
  'microstate_id': 'EN300-26619500_2'},
 'final_microstate': {'compound_id': '3v3m-2020-04-Jacobs',
  'microstate_id': '3v3m-2020-04-Jacobs'}}

In [176]:
data['series']['compounds'][0]

{'metadata': {'compound_id': '3v3m-2020-04-Jacobs',
  'smiles': 'CC(C)(C)c1ccc(cc1)N([C@H](c2cccnc2)C(=O)NC(C)(C)C)C(=O)c3ccco3',
  'experimental_data': {'pIC50': 5.50248779245608}},
 'microstates': [{'microstate': {'microstate_id': '3v3m-2020-04-Jacobs',
    'free_energy_penalty': {'point': 0.0, 'stderr': 0.0},
    'smiles': 'CC(C)(C)c1ccc(cc1)N([C@H](c2cccnc2)C(=O)NC(C)(C)C)C(=O)c3ccco3'},
   'free_energy': {'point': -11.86540296121326, 'stderr': 0.0816827737002081},
   'first_pass_free_energy': {'point': -4.222161558156282,
    'stderr': 0.006251239324560247}}],
 'free_energy': {'point': -11.86540296121326, 'stderr': 0.0816827737002081}}

In [178]:
microstate_dict = {}

for d in data['series']['compounds']:
        for m in d['microstates']:

            microstate_dict[m['microstate']['microstate_id']] = m['microstate']['smiles']
  
            # if m['microstate']['microstate_id'] == microstate:
            #     smiles = m['microstate']['smiles']
            # else:
            #     smiles = None

In [188]:
data['series']['transformations'][0]['transformation']

{'run_id': 4,
 'xchem_fragment_id': 'x3110',
 'initial_microstate': {'compound_id': 'EN300-26619500',
  'microstate_id': 'EN300-26619500_2'},
 'final_microstate': {'compound_id': '3v3m-2020-04-Jacobs',
  'microstate_id': '3v3m-2020-04-Jacobs'}}

In [189]:
s4_fe_store = {} 

for t in data['series']['transformations']:



    #print(v['microstates'])

    # if t['binding_free_energy']['point']< 0:

    # print(f"start compound: {t['transformation']['initial_microstate']['compound_id']}, start compound MS: {t['transformation']['initial_microstate']['microstate_id']}, final compound: {t['transformation']['final_microstate']['compound_id']}")
    # print(f"binding free energy: {t['binding_free_energy']['point']}")
    # print(' ')

    microstate = t['transformation']['initial_microstate']['microstate_id']
    start_compound = t['transformation']['initial_microstate']['compound_id']
    run = t['transformation']['run_id']

    # this doesn't work
    for d in microstate_dict:
        if d == microstate:
                smiles = microstate_dict[d]
        else:
            smiles = None

    end_compound = t['transformation']['final_microstate']['compound_id']
    DDG = t['binding_free_energy']['point']
    dDDG = t['binding_free_energy']['stderr']


    s4_fe_store[microstate] = {
        "RUN": run,
        "start_compound": start_compound,
        "SMILES": smiles,
        "end_compound": end_compound,
        "DDG": DDG,
        "dDDG": dDDG
    }

    # s4_fe_store["start_compound"].append(t['transformation']['initial_microstate']['compound_id'])
    # s4_fe_store["start_compound_ms"].append(t['transformation']['initial_microstate']['microstate_id'])
    # s4_fe_store["end_compound"].append(t['transformation']['final_microstate']['compound_id'])
    # s4_fe_store["DDG"].append(t['binding_free_energy']['point'])
    # s4_fe_store["dDDG"].append(t['binding_free_energy']['point'])

    # if not t['transformation']['final_microstate']['compound_id'] == "3v3m-2020-04-Jacobs":
    #     print("blah")
    

In [190]:
# each key is a microstate, this way each key is unique
s4_fe_store['EN300-26619500_2']

{'RUN': 4,
 'start_compound': 'EN300-26619500',
 'SMILES': None,
 'end_compound': '3v3m-2020-04-Jacobs',
 'DDG': 1.0548020968189082,
 'dDDG': 0.6574041713791445}

In [191]:
s4_fe_store_sorted = {k: v for k, v in sorted(s4_fe_store.items(), key=lambda x: x[1]['DDG'])}

In [192]:
import pandas as pd
df = pd.DataFrame.from_dict(s4_fe_store_sorted, 'index')

In [193]:
df.reset_index(inplace=True)


In [196]:
df.columns = "microstate", "RUN", "start_compound", "SMILES", "end_compound", "DDG", "dDDG"

In [197]:
df.to_csv('ugi_ddg_sorted.csv')

In [198]:
df

Unnamed: 0,microstate,RUN,start_compound,SMILES,end_compound,DDG,dDDG
0,EN300-62447_4,3295,EN300-62447,,3v3m-2020-04-Jacobs,-11.695215,0.449214
1,EN300-79027_2,1203,EN300-79027,,3v3m-2020-04-Jacobs,-7.995218,0.264374
2,EN300-622275_14,553,EN300-622275,,3v3m-2020-04-Jacobs,-6.298211,0.830469
3,EN300-67492_2,2179,EN300-67492,,3v3m-2020-04-Jacobs,-5.824463,0.476009
4,EN300-313986_2,2894,EN300-313986,,3v3m-2020-04-Jacobs,-5.506994,0.335048
...,...,...,...,...,...,...,...
4508,EN300-371855_1,1024,EN300-371855,,3v3m-2020-04-Jacobs,17.451148,0.286446
4509,EN300-344116_1,930,EN300-344116,,3v3m-2020-04-Jacobs,18.740612,0.309765
4510,EN300-113892_1,3903,EN300-113892,,3v3m-2020-04-Jacobs,23.385074,0.304551
4511,EN300-62447_8,3291,EN300-62447,,3v3m-2020-04-Jacobs,36.026740,0.364737
