In [206]:
import sbmlcore, pandas, numpy, pytest
import freesasa
import os.path
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [190]:
#Test on RNAP structure with offsets
file = sbmlcore.FreeSASA('tests/5uh6.pdb')#, offsets = {'A': 0, 'B': 0, 'C': -6}
b = {'segid': ['A', 'A', 'A', 'B', 'C', 'C'], 'mutation': ['I3D','S4K', 'Q5V', 'R6D', 'S450F', 'D435F']} #N.B. Mutation must include offset
df = pandas.DataFrame(b)
df

Unnamed: 0,segid,mutation
0,A,I3D
1,A,S4K
2,A,Q5V
3,B,R6D
4,C,S450F
5,C,D435F


In [191]:
structure = freesasa.Structure("tests/5uh6.pdb")
result = freesasa.calc(structure)
area_classes = freesasa.classifyResults(result, structure)

print("Total : %.2f A2" % result.totalArea())
for key in area_classes:
    print(key, ": %.2f A2" % area_classes[key])
    
print("Total number of atoms: ", result.nAtoms())
print("Atom area for atom 5 in A^2", result.atomArea(5))
#print("Residue area for resid 5", result.residueAreas()) #seems to return objects, not floats...

res_areas = result.residueAreas()
print(res_areas["A"]["5"])

#To get SASA of different residues, uses Pymol syntax
# ('selection-name, selector', 'selection-name, selector')
# Notes on syntax are here: https://freesasa.github.io/doxygen/Selection.html
selections = freesasa.selectArea(('alanine, resn ala', 'r1_10, resi 1-10'), 
                                 structure, result)
for key in selections:
    print(key, ": %.2f A2" % selections[key])

first_residue = freesasa.selectArea(('alanine, resn ala', 'proline, resn pro'), structure, result)
first_residue = freesasa.selectArea(['r1, resi 1', 'r1_10, resi 1-10'], 
                                 structure, result)
print(first_residue)

second_third_fourth_residue = freesasa.selectArea(('resid2, chain A and resi 2', 'r3, resi 3', 'r4, resi 4'), 
                                 structure, result)

print(second_third_fourth_residue)
print(type(second_third_fourth_residue))

Total : 140821.76 A2
Polar : 68137.62 A2
Apolar : 72684.14 A2
Total number of atoms:  26023
Atom area for atom 5 in A^2 2.687102462728613
<freesasa.ResidueArea object at 0x17183bf70>
alanine : 5875.45 A2
r1_10 : 4184.64 A2
{'r1': 298.1203949432643, 'r1_10': 4184.64148490887}
{'resid2': 0.0, 'r3': 394.4787945224113, 'r4': 300.3470365112952}
<class 'dict'>


In [192]:
#Check if FreeSASA automatically applies offsets to resids:
test_areas = freesasa.selectArea(('resid28, chain C and resn SER and resi 28', 'r1_10, resi 1-10'), structure, result) 
#N.B. if chain, resname and resid are not consistent, SASA = 0! SASA could also = 0 for legitimate reasons
#N.B.2 cannot use one letter amino acid names, must be 3 letter! 3 letter is case insensitive
print(test_areas)

{'resid28': 63.182973491449175, 'r1_10': 4184.64148490887}


In [193]:
test_areas = freesasa.selectArea(('resid27, chain C and resn SER and resi 27', 'r1_10, resi 1-10'), structure, result)
print(test_areas)
#SASA is 0 because SER 27 in chain C does not exist

{'resid27': 0.0, 'r1_10': 4184.64148490887}


In [194]:
def split_mutation(row):
    m=row.mutation
    return(int(m[1:-1]))

df['resid'] = df.apply(split_mutation, axis=1)
df['id'] = df['segid'] + df['resid'].astype(str)
df.set_index('id', inplace=True)
df

Unnamed: 0_level_0,segid,mutation,resid
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A3,A,I3D,3
A4,A,S4K,4
A5,A,Q5V,5
B6,B,R6D,6
C450,C,S450F,450
C435,C,D435F,435


In [195]:
def resname_1(row):
    m=row.mutation
    return(str(m[0:1]))

df['resname_1'] = df.apply(resname_1, axis=1)
df

Unnamed: 0_level_0,segid,mutation,resid,resname_1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A3,A,I3D,3,I
A4,A,S4K,4,S
A5,A,Q5V,5,Q
B6,B,R6D,6,R
C450,C,S450F,450,S
C435,C,D435F,435,D


Two ways of converting 1 letter to 3 letter resname using a dictionary:

In [196]:
amino_acid_onetothreeletter = {'C': 'CYS', 'D': 'ASP', 'S': 'SER', 'Q': 'GLN', 'K': 'LYS',
     'I': 'ILE', 'P': 'PRO', 'T': 'THR', 'F': 'PHE', 'N': 'ASN',
     'G': 'GLY', 'H': 'HIS', 'L': 'LEU', 'R': 'ARG', 'W': 'TRP',
     'A': 'ALA', 'V': 'VAL', 'E': 'GLU', 'Y': 'TYR', 'M': 'MET'}

In [197]:
df["resname_3"] = [amino_acid_onetothreeletter[resname] for resname in df.resname_1]
df

Unnamed: 0_level_0,segid,mutation,resid,resname_1,resname_3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A3,A,I3D,3,I,ILE
A4,A,S4K,4,S,SER
A5,A,Q5V,5,Q,GLN
B6,B,R6D,6,R,ARG
C450,C,S450F,450,S,SER
C435,C,D435F,435,D,ASP


In [198]:
#Need to create offset to resid so that it aligns with the pdb file (i.e. OPPOSITE to in structural features)
test_areas = freesasa.selectArea(('resid456, chain C and resn SER and resi 456', 'resid441, chain C and resn ASP and resi 441'), structure, result) 
print(test_areas)
#corresponds to S450 and D435 in mutation dataframe

{'resid456': 8.757059761145552, 'resid441': 15.85735040189671}


In [199]:
#Adds column for offsets
offsets = {'A': 0, 'B': 0, 'C': -6}

df["chain_offsets"] = [offsets[chain] for chain in df.segid]
df


Unnamed: 0_level_0,segid,mutation,resid,resname_1,resname_3,chain_offsets
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A3,A,I3D,3,I,ILE,0
A4,A,S4K,4,S,SER,0
A5,A,Q5V,5,Q,GLN,0
B6,B,R6D,6,R,ARG,0
C450,C,S450F,450,S,SER,-6
C435,C,D435F,435,D,ASP,-6


In [200]:
#Adds column for pdb resids (i.e. the resid as given in the pdb which may not be the same as in the mutation df)
df["pdb_resid"] = df["resid"] - df["chain_offsets"]
df

Unnamed: 0_level_0,segid,mutation,resid,resname_1,resname_3,chain_offsets,pdb_resid
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A3,A,I3D,3,I,ILE,0,3
A4,A,S4K,4,S,SER,0,4
A5,A,Q5V,5,Q,GLN,0,5
B6,B,R6D,6,R,ARG,0,6
C450,C,S450F,450,S,SER,-6,456
C435,C,D435F,435,D,ASP,-6,441


In [201]:
sele_text = ["%s%i, resi %i and chain %s and resn %s" % (k,i,j,k,l) for i,j,k,l in zip(df.resid, df.pdb_resid, df.segid, df.resname_3)]
sele_text
# zip puts together the resid, segid and resname_3 for each index in df
# % in this case replaces the %s (string) and %i (int) with each variable in ()
# N.B. Offsets are only applied in the selection part

['A3, resi 3 and chain A and resn ILE',
 'A4, resi 4 and chain A and resn SER',
 'A5, resi 5 and chain A and resn GLN',
 'B6, resi 6 and chain B and resn ARG',
 'C450, resi 456 and chain C and resn SER',
 'C435, resi 441 and chain C and resn ASP']

In [202]:
#Calculate SASA for each resid in mutation
results = freesasa.selectArea(sele_text, structure, result)
print(results)
s = pandas.Series(results)
b = pandas.DataFrame(s, columns=['surface_area'])
b

{'A3': 50.86928083852934, 'A4': 61.11993677997087, 'A5': 123.63171533944528, 'B6': 112.7680523066127, 'C450': 8.757059761145552, 'C435': 15.85735040189671}


Unnamed: 0,surface_area
A3,50.869281
A4,61.119937
A5,123.631715
B6,112.768052
C450,8.75706
C435,15.85735


In [203]:
#Create multi index for FreeSASA df
#b.set_index(['segid', 'resid', 'amino_acid'], inplace=True)

In [205]:
#Add SASA column to existing dataframe 

new = df.join(b, how='left')
new

Unnamed: 0_level_0,segid,mutation,resid,resname_1,resname_3,chain_offsets,pdb_resid,surface_area
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A3,A,I3D,3,I,ILE,0,3,50.869281
A4,A,S4K,4,S,SER,0,4,61.119937
A5,A,Q5V,5,Q,GLN,0,5,123.631715
B6,B,R6D,6,R,ARG,0,6,112.768052
C450,C,S450F,450,S,SER,-6,456,8.75706
C435,C,D435F,435,D,ASP,-6,441,15.85735


In [None]:
#Create MultiIndex using segid, resid and amino_acid
other.set_index(['segid', 'resid', 'amino_acid'], inplace=True)
self.results.set_index(['segid', 'resid', 'amino_acid'], inplace=True)

other = other.join(self.results, how='left')

In [104]:
def one_to_three(row):
    return amino_acid_onetothreeletter[row.resname_1]

df["resname_3"] = df.apply(one_to_three, axis=1)
df
#help(df.apply)

Unnamed: 0_level_0,segid,mutation,resid,resname_1,resname_3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A3,A,I3D,3,I,ILE
A4,A,S4K,4,S,SER
A5,A,Q5V,5,Q,GLN
B6,B,R6D,6,R,ARG
C450,C,S450F,450,S,SER
C435,C,D435F,435,D,ASP


In [78]:
#for a given key (i.e. one letter), return the value (i.e. three letter)
x = amino_acid_onetothreeletter['C']
print(x)
#does same as above
y = amino_acid_onetothreeletter.get('C')
print(y)

amino_acid_onetothreeletter.values()

for value in amino_acid_onetothreeletter:
    print(amino_acid_onetothreeletter[value])
    
#for each one letter in the df, return the three letter value and put in a new column of the df
three = amino_acid_onetothreeletter[df['resname_1']]

CYS
CYS
CYS
ASP
SER
GLN
LYS
ILE
PRO
THR
PHE
ASN
GLY
HIS
LEU
ARG
TRP
ALA
VAL
GLU
TYR
MET


TypeError: unhashable type: 'Series'

In [68]:
def three_amino_acid(row):
    return(amino_acid_onetothreeletter[row.resname_1])

one_letter = df.resname_1
one_letter

one_letter.apply(three_amino_acid)




AttributeError: 'str' object has no attribute 'resname_1'

In [9]:
sele_text = ["%s%i, resi %i and chain %s" % (j,i,i,j) for i,j in zip(df.resid, df.segid)]
sele_text

['A3, resi 3 and chain A',
 'A4, resi 4 and chain A',
 'A5, resi 5 and chain A',
 'B6, resi 6 and chain B',
 'C450, resi 450 and chain C',
 'C435, resi 435 and chain C']

In [10]:
results = freesasa.selectArea(sele_text, structure, result)
print(results)
s = pandas.Series(results)
b = pandas.DataFrame(s, columns=['surface_area'])
b

{'A3': 50.86928083852934, 'A4': 61.11993677997087, 'A5': 123.63171533944528, 'B6': 112.7680523066127, 'C450': 7.521629512442825, 'C435': 97.27040394434373}


Unnamed: 0,surface_area
A3,50.869281
A4,61.119937
A5,123.631715
B6,112.768052
C450,7.52163
C435,97.270404
