# Notebook for converting HDX to gibbs

In [None]:
from pyhdx import read_dynamx, HDXMeasurement
from pyhdx.fitting import (
    fit_rates_half_time_interpolate,
    fit_rates_weighted_average,
    fit_gibbs_global,
)
from pyhdx.process import filter_peptides, apply_control, correct_d_uptake
from pathlib import Path
from gibbs_functions import compute_dG_NC, compute_dG_control
import os

In [None]:
os.chdir('/****/sim/HDX_tables')
outputdir='/****/sim/gibbs_tables'


In [None]:
#files includes the file name, the base name for directory, the base name for file, and the sequence
files=[
    ['barnase_HDX.csv','barnase','barnase_A','AQVINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR'],
 ['alpha_lactalbumin_HDX.csv','alpha_lactalbumin','alpha_lactalbumin_apo','AKQFTKCELSQLLKDIDGYGGIALPELICTMFHTSGYDTQAIVENNESTEYGLFQISNKLWCKSSQVPQSRNICDISCDKFLDDDITDDIMCAKKILDIKGIDYWLAHKALCTEKLEQWLCEKL'],
   ['enolase_HDX.csv','enolase','enolase_apo','AVSKVYARSVYDSRGNPTVEVELTTEKGVFRSIVPSGASTGVHEALEMRDGDKSKWMGKGVLHAVKNVNDVIAPAFVKANIDVKDQKAVDDFLISLDGTANKSKLGANAILGVSLAASRAAAAEKNVPLYKHLADLSKSKTSPYVLPVPFLNVLNGGSHAGGALALQEFMIAPTGAKTFAEALRIGSEVYHNLKSLTKKRYGASAGNVGDEGGVAPNIQTAEEALDLIVDAIKAAGHDGKIKIGLDCASSEFFKDGKYDLDFKNPNSDKSKWLTGPQLADLYHSLMKRYPIVSIEDPFAEDDWEAWSHFFKTAGIQIVADDLTVTNPKRIATAIEKKAADALLLKVNQIGTLSESIKAAQDSFAAGWGVMVSHRSGETEDTFIADLVVGLRTGQIKTGAPARSERLAKLNQLLRIEEELGDNAVFAGENFHHGDKL'],
['halm2_HDX.csv','halm2','halm2_AF_apo','MKTPLTSEHPSVPTTLPHTNDTDWLEQLHDILSIPVTEEIQKYFHAENDLFSFFYTPFLQFTYQSMSDYFMTFKTDMALIERQSLLQSTLTAVHHRLFHLTHRTLISEMHIDKLTVGLNGSTPHERYMDFNHKFNKTSKSKNLFNIYPILGKLVVNETLRTINFVKKIIQHYMKDYLLLSDFFKEKDLRLTNLQLGVGDTHVNGQCVTILTFASGQKVVYKPRSLSIDKQFGEFIEWVNSKGFQPSLRIPIAIDRQTYGWYEFIPHQEATSEDEIERYYSRIGGYLAIAYLFGATDLHLDNLIACGEHPMLIDLETLFTNDLDCYDSAFPFPALARELTQSVFGTLMLPITIASGKLLDIDLSAVGGGKGVQSEKIKTWVIVNQKTDEMKLVEQPYVTESSQNKPTVNGKEANIGNYIPHVTDGFRKMYRLFLNEIDELMDHNGPIFAFESCQIRHVFRATHVYAKFLEASTHPDYLQEPTRRNKLFESFWNITSLMAPFKKIVPHEIAELENHDIPYFVLTCGGTIVKDGYGRDIADLFQSSCIERVTHRLQQLGSEDEARQIRYIKSSLATLTNGDWTPSHEKTPMSPASADREDGYFLREAQAIGDDILAQLIWEDDRHAAYLIGVSVGMNEAVTVSPLTPGIYDGTLGIVLFFDQLAQQTGETHYRHAADALLEGMFKQLKPELMPSSAYFGLGSLFYGLMVLGLQRSDSHIIQKAYEYLKHLEECVQHEETPDFVSGLSGVLYMLTKIYQLTNEPRVFEVAKTTASRLSVLLDSKQPDTVLTGLSHGAAGFALALLTYGTAANDEQLLKQGHSYLVYERNRFNKQENNWVDLRKGNAYQTFWCHGAPGIGISRLLLAQFYDDELLHEELNAALNKTISDGFGHNHSLCHGDFGNLDLLLLYAQYTNNPEPKELARKLAISSIDQAHTYGWKLGLNHSDQLQGMMLGVTGIGYQLLRHINPTVPSILALELPSSTLTEKELRIHDR'] ,
    ['halm2_HDX.csv','halm2_ATP','halm2_AF_ATP','MKTPLTSEHPSVPTTLPHTNDTDWLEQLHDILSIPVTEEIQKYFHAENDLFSFFYTPFLQFTYQSMSDYFMTFKTDMALIERQSLLQSTLTAVHHRLFHLTHRTLISEMHIDKLTVGLNGSTPHERYMDFNHKFNKTSKSKNLFNIYPILGKLVVNETLRTINFVKKIIQHYMKDYLLLSDFFKEKDLRLTNLQLGVGDTHVNGQCVTILTFASGQKVVYKPRSLSIDKQFGEFIEWVNSKGFQPSLRIPIAIDRQTYGWYEFIPHQEATSEDEIERYYSRIGGYLAIAYLFGATDLHLDNLIACGEHPMLIDLETLFTNDLDCYDSAFPFPALARELTQSVFGTLMLPITIASGKLLDIDLSAVGGGKGVQSEKIKTWVIVNQKTDEMKLVEQPYVTESSQNKPTVNGKEANIGNYIPHVTDGFRKMYRLFLNEIDELMDHNGPIFAFESCQIRHVFRATHVYAKFLEASTHPDYLQEPTRRNKLFESFWNITSLMAPFKKIVPHEIAELENHDIPYFVLTCGGTIVKDGYGRDIADLFQSSCIERVTHRLQQLGSEDEARQIRYIKSSLATLTNGDWTPSHEKTPMSPASADREDGYFLREAQAIGDDILAQLIWEDDRHAAYLIGVSVGMNEAVTVSPLTPGIYDGTLGIVLFFDQLAQQTGETHYRHAADALLEGMFKQLKPELMPSSAYFGLGSLFYGLMVLGLQRSDSHIIQKAYEYLKHLEECVQHEETPDFVSGLSGVLYMLTKIYQLTNEPRVFEVAKTTASRLSVLLDSKQPDTVLTGLSHGAAGFALALLTYGTAANDEQLLKQGHSYLVYERNRFNKQENNWVDLRKGNAYQTFWCHGAPGIGISRLLLAQFYDDELLHEELNAALNKTISDGFGHNHSLCHGDFGNLDLLLLYAQYTNNPEPKELARKLAISSIDQAHTYGWKLGLNHSDQLQGMMLGVTGIGYQLLRHINPTVPSILALELPSSTLTEKELRIHDR'],
 ['VDR_D3_M9Q_HDX.csv','D3','VDR_AF_D3',"MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKGFFRRSMKRKALFTCPFNGDCRITKDNRRHCQACRLKRCVDIGMMKEFILTDEEVQRKREMILKRKEEEALKDSLRPKLSEEQQRIIAILLDAHHKTYDPTYSDFCQFRPPVRVNDGGGSHPSRPNSRHTPSFSGDSSSSCSDHCITSSDMMDSSSFSNLDLSEEDSDDPSVTLELSQLSMLPHLADLVSYSIQKVIGFAKMIPGFRDLTSEDQIVLLKSSAIEVIMLRSNESFTMDDMSWTCGNQDYKYRVSDVTKAGHSLELIEPLIKFQVGLKKLNLHEEEHVLLMAICIVSPDRPGVQDAALIEAIQDRLSNTLQTYIRCRHPPPGSHLLYAKMIQKLADLRSLNEEHSKQYRCLSFQPECSMKLTPLVLEVFGNEIS"],
 ['VDR_D3_M9Q_HDX.csv','M9Q','VDR_AF_M9Q',"MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKGFFRRSMKRKALFTCPFNGDCRITKDNRRHCQACRLKRCVDIGMMKEFILTDEEVQRKREMILKRKEEEALKDSLRPKLSEEQQRIIAILLDAHHKTYDPTYSDFCQFRPPVRVNDGGGSHPSRPNSRHTPSFSGDSSSSCSDHCITSSDMMDSSSFSNLDLSEEDSDDPSVTLELSQLSMLPHLADLVSYSIQKVIGFAKMIPGFRDLTSEDQIVLLKSSAIEVIMLRSNESFTMDDMSWTCGNQDYKYRVSDVTKAGHSLELIEPLIKFQVGLKKLNLHEEEHVLLMAICIVSPDRPGVQDAALIEAIQDRLSNTLQTYIRCRHPPPGSHLLYAKMIQKLADLRSLNEEHSKQYRCLSFQPECSMKLTPLVLEVFGNEIS"],
 ['VDR_D3_M9Q_HDX.csv','apo','VDR_AF_apo',"MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKGFFRRSMKRKALFTCPFNGDCRITKDNRRHCQACRLKRCVDIGMMKEFILTDEEVQRKREMILKRKEEEALKDSLRPKLSEEQQRIIAILLDAHHKTYDPTYSDFCQFRPPVRVNDGGGSHPSRPNSRHTPSFSGDSSSSCSDHCITSSDMMDSSSFSNLDLSEEDSDDPSVTLELSQLSMLPHLADLVSYSIQKVIGFAKMIPGFRDLTSEDQIVLLKSSAIEVIMLRSNESFTMDDMSWTCGNQDYKYRVSDVTKAGHSLELIEPLIKFQVGLKKLNLHEEEHVLLMAICIVSPDRPGVQDAALIEAIQDRLSNTLQTYIRCRHPPPGSHLLYAKMIQKLADLRSLNEEHSKQYRCLSFQPECSMKLTPLVLEVFGNEIS"],
['m_lipin_HDX.csv','m_lipin','m_lipin','MSLRDLPSIAISLCGGLSDHREITKDAFLEQAVSYQQFADNPAIIDDPNLVVKVGNKYYNWTTAAPLLLAMQAFQKPLPKATVESIMRDKMPKKGGRWWFSWRGRNATI'],
]


## FILES WITH NO CONTROL INCLUDED

In [None]:
outputdir='/****/sim/gibbs_tables'
for i in range(len(files)):
    os.chdir('/****/sim/HDX_tables')
    file_name=files[i][0]
    state_name=files[i][1]
    output_name=files[i][2]
    sequence_name=files[i][3]
    print(file_name,state_name,sequence_name)
    compute_dG_NC(file_name,outputdir,sequence_name,state_name,output_name)

## CONTROL FILES

In [None]:
files=[['secB_HDX.csv','secB','MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWQPEVKLDLDTASSQLADDVYEVVLRVTVTASLGEETAFLCEVQQGGIFSIAGIEGTQMAHCLGAYCPNILFPYARECITSMVSRGTFPQLNLAPVNFDALFMNYLQQQAGEGTEEHQDA']]
outputdir='/****/sim/gibbs_tables'
for i in range(len(files)):
    os.chdir('/****/sim/HDX_tables')
    file_name=files[i][0]
    state_name=files[i][1]
    output_name=files[i][2]
    sequence_name=files[i][3]
    print(file_name,state_name,sequence_name)
    compute_dG_control(file_name,outputdir,sequence_name,state_name,output_name,value=0.0)