In [51]:
%matplotlib inline

import json
import urllib3
import requests

import freesasa
import pandas as pd
from bs4 import BeautifulSoup


urllib3.disable_warnings()

In [23]:
url = 'https://rosettamhc.chemistry.ucsc.edu'
json_url = 'https://rosettamhc.chemistry.ucsc.edu/static/%s.json'
pdb_url = 'https://rosettamhc.chemistry.ucsc.edu/static/%s-ensemble.pdb'

In [24]:
http = urllib3.PoolManager(cert_reqs = 'CERT_NONE')
resp = http.request("GET", url=url)
soup = BeautifulSoup(resp.data, 'html.parser')
peptides = [o.text for o in soup.find_all('option')[1:]]

In [26]:
! mkdir 'data'
! mkdir 'data/rosetta_mhc'
! mkdir 'data/rosetta_mhc/pdb'

In [42]:
data = []

for peptide in peptides:
    print(peptide)

    # Download PDB file
    #resp = http.request("GET", url=pdb_url % peptide)
    #with open('data/rosetta_mhc/pdb/%s-ensemble.pdb' % peptide, 'w') as w:
    #    w.write(resp.data.decode("utf-8"))

    # Get JSON file
    resp = http.request("GET", url=json_url % peptide)
    json_data = json.loads(resp.data)

    data.append((peptide, len(peptide), float(json_data['ba']),
                 float(json_data['energy']), json_data['template']))

columns = ['peptide', 'length', 'binding_affinity', 'energy', 'template']
df = pd.DataFrame(data=data, columns=columns)
df.to_csv('data/rosetta_mhc/peptide_binding_affinity.csv', index=False)

AALLADKFPV
ADLDDFSKQL
AFLPFAMGI
AIASEFSSL
AIFYLITPV
AIMTRCLAV
AINRPQIGV
AIPTNFTISV
ALAPNMMV
ALAPNMMVT
ALCADSIII
ALDISASIV
ALFAYTKRNV
ALGGSVAIKI
ALGKLQDVV
ALGVLMSNL
ALITLATCEL
ALLADKFPV
ALLADKFPVL
ALLAGTITS
ALLAVFQSA
ALLEDEFTPF
ALLEDEFTP
ALLSDLQDL
ALLTLQQIEL
ALNKATNNA
ALNLGETFV
ALNNIINNA
ALNTLVKQL
ALPETTADI
ALPETTADIV
ALQDAYYRA
ALQIPFAMQM
ALRANSAVKL
ALSKGVHFV
ALVSDVGDSA
ALVYDNKLKA
ALVYFLQSI
ALWEIQQV
ALWEIQQVV
ALYYPSARI
ALYYPSARIV
AMGIIAMSA
AMQTMLFTM
AMRNAGIVGV
AMSAFAMMFV
AMYTPHTV
AMYTPHTVL
AQFAPSASA
AQVDVVNFNL
ASFDNFKFV
ASLPFGWLI
ATIPIQASL
ATVAYFNMV
AVANGDSEV
AVASKILGL
AVIKTLQPV
AVINGDRWFL
AVLDMCASL
DLFMRIFTI
DLNETLVTM
ELDERIDKV
ELFENKTTL
ELLHAPATV
ELTPVVQTI
ELYSPIFLI
ELYSPIFLIV
FAFACPDGV
FAQDGNAAI
FCLEASFNYL
FCNDPFLGV
FDMSKFPLKL
FELDERIDKV
FELLHAPATV
FFFYENAFL
FGADPIHSL
FGDDTVIEV
FGMSRIGMEV
FHLDGEVITF
FHLVDFQVTI
FIAGLIAIV
FIASFRLFA
FIEDLLFNKV
FIRQEEVQEL
FISDEVARDL
FITESKPSV
FKLNEEIAII
FKLSYGIATV
FKLVNKFLAL
FLAFVVFL
FLAFVVFLL
FLAHIQWMV
FLALCADSI
FLALITLA
FLALITLAT
FLARGIVFM
FLEGETLPT
FL

In [43]:
df.sort_values(by='binding_affinity')

Unnamed: 0,peptide,length,binding_affinity,energy,template
91,FLAHIQWMV,9,1.8,-71.124,3REW
237,ILFTRFFYV,9,2.2,-72.003,2GIT
114,FLLPSLATV,9,2.5,-77.116,3GSQ
717,YLYALVYFL,9,2.5,-73.002,1EEZ
374,LLYDANYFL,9,2.6,-75.012,1T1Z
...,...,...,...,...,...
541,STDTGVEHV,9,2931.6,-51.191,1TVB
203,GVGYQPYRV,9,2950.7,-57.220,5SWQ
64,ELTPVVQTI,9,3101.6,-63.763,6PTB
201,GVDIAANTV,9,3198.8,-56.292,5SWQ


In [50]:
df[df['length'] == 9].sort_values(by='binding_affinity')

Unnamed: 0,peptide,length,binding_affinity,energy,template
91,FLAHIQWMV,9,1.8,-71.124,3REW
237,ILFTRFFYV,9,2.2,-72.003,2GIT
114,FLLPSLATV,9,2.5,-77.116,3GSQ
717,YLYALVYFL,9,2.5,-73.002,1EEZ
374,LLYDANYFL,9,2.6,-75.012,1T1Z
...,...,...,...,...,...
541,STDTGVEHV,9,2931.6,-51.191,1TVB
203,GVGYQPYRV,9,2950.7,-57.220,5SWQ
64,ELTPVVQTI,9,3101.6,-63.763,6PTB
201,GVDIAANTV,9,3198.8,-56.292,5SWQ


In [64]:
peptides_selection = df[df['length'] == 9]

data = []

for peptide in peptides_selection.itertuples():
    print(peptide.peptide)

    structure = freesasa.Structure('data/rosetta_mhc/pdb/%s-ensemble.pdb' % peptide.peptide)
    result = freesasa.calc(structure)
    
    selections = tuple(['%d, resi %d' % (i, i) for i in range(181, 190)])
    result_selections = freesasa.selectArea(selections, structure, result)
    
    print(result_selections)
    break

AFLPFAMGI
('181, resi 181', '182, resi 182', '183, resi 183', '184, resi 184', '185, resi 185', '186, resi 186', '187, resi 187', '188, resi 188', '189, resi 189')
{'181': 11.37216387731515, '182': 4.836434567942511, '183': 1.066586477969954, '184': 44.8680291177604, '185': 71.28588461673995, '186': 32.71190694439774, '187': 39.538697153073024, '188': 17.510712798210093, '189': 33.991680638856536}


In [65]:
dir(result)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_get_address',
 '_safe_div',
 'atomArea',
 'nAtoms',
 'residueAreas',
 'totalArea']

In [70]:
dir(result.residueAreas()['A']['189'])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'apolar',
 'hasRelativeAreas',
 'mainChain',
 'polar',
 'relativeApolar',
 'relativeMainChain',
 'relativePolar',
 'relativeSideChain',
 'relativeTotal',
 'residueNumber',
 'residueType',
 'sideChain',
 'total']

In [78]:
result.residueAreas()['A']['183'].relativeSideChain

0.007428408337062107