I restructure a bit of the code here and also some of the data. The researchers have not been consistent in the enumeration of the peptides neither the domains so its kinda difficult to structure it all together. 

In [1]:
import os 
os.chdir('E:\Ecole\Year 3\Projet 3A')
import pandas as pd
import numpy as np 

class Domain:
    
    def __init__(self, name):
        self.name = name
        self.thresholds = None
        self.thetas = None

class Peptide:
    
    def __init__(self, name):
        self.name = name
        self.sequence = None
        self.sequence_bis = None ##Sequence bis are the last five amino acids
        self.energy_ground = 0.0 ##Anticipating the calculation of a ground state energy for the peptide
        
class Data:
    
    def __init__(self):
        temp_df = pd.read_excel('Data_PDZ/MDSM_01_stiffler_bis.xls')
        self.aminoacids = [acid.encode('utf-8') for acid in list(temp_df.columns[:20])]
        self.df = temp_df.T
        self.domains = [Domain(domain.encode('utf-8')) for domain in list(self.df.columns)]
        self.domain_names = [domain.name for domain in self.domains]
        self.pep_seqs = []
        self.pep_names = []
        with open('Data_PDZ/peptides.free') as f:
            for line in f:
                x = line.split()
                self.pep_seqs.append(x[1])
                self.pep_names.append(x[0])
        self.peptides = [Peptide(name) for name in self.pep_names]
        
    def create_domains(self):
        for domain in self.domains:
            domain.thetas = self.df[domain.name][:100]
            domain.thetas = np.asarray(domain.thetas)
            domain.thetas = domain.thetas.reshape(5,20)
            domain.thresholds = np.asarray(self.df[domain.name][100:])   
    
    def create_peptides(self):
        for i in range(len(self.pep_seqs)):
            self.peptides[i].sequence = self.pep_seqs[i]
            self.peptides[i].sequence_bis = list(self.pep_seqs[i])[5:]        

In [2]:
PDZ_Data = Data()

In [3]:
PDZ_Data.create_domains()
PDZ_Data.create_peptides()

In [4]:
PDZ_Data.peptides[10].sequence_bis

['D', 'D', 'L', 'E', 'I']

Now we have created the preliminary data with the binding energy values and the peptide sequences. The last thing left to do is to get the data from the interaction matrix for each of the domain

In [128]:
fp_interaction_matrix = pd.read_excel('Data_PDZ/fp_interaction_matrix.xlsx')
for column in fp_interaction_matrix.columns:
    fp_interaction_matrix.loc[fp_interaction_matrix[column] == 0.0, column] = -1.0
fp_interaction_matrix = fp_interaction_matrix.rename(columns=lambda x: str(x).replace(" ", ""))

In [129]:
def evaluate_score(domain, peptide):
    score = 0.0
    for i in range(5):
        j = PDZ_Data.aminoacids.index(peptide.sequence_bis[i])
        score += domain.thetas[i,j]
    return score - domain.thresholds[0]
    

In [130]:
evaluate_score(PDZ_Data.domains[16], PDZ_Data.peptides[9])

10.72625

In [131]:
def sigmoid(x, a=1):
    return 1.0/(1+np.exp(-1.0*a*x))
def log_modified(x):
    if x > 0:
        return np.log(1+np.exp(-x))
    else:
        return -x + np.log(1+np.exp(x))

Let us take one particular ligand and make mutations to this ligand. 

In [132]:
test_peptide = PDZ_Data.peptides[3]
print test_peptide.name

ASIC2


In [133]:
print test_peptide.sequence_bis

['E', 'E', 'I', 'A', 'C']


In [134]:
def convert2seq(seq_int):
    return [PDZ_Data.aminoacids[i] for i in seq_int]
def convert2int(seq_pep):
    return [PDZ_Data.aminoacids.index(pep) for pep in seq_pep]

Let us calculate the **energy** associated for each peptide in our data set. Once calculated for one peptide we shall calculate it for all the peptides in our data set. These values would then also be considered as fixed for the purposes of modeling the robustness of the specificity of the peptide-domain interaction. 

In [135]:
score_natural = 0.0
print test_peptide.name
for i in range(len(PDZ_Data.domain_names)):
    temp = evaluate_score(PDZ_Data.domains[i], test_peptide)
    alpha = fp_interaction_matrix[test_peptide.name][i]
    ## As a sanity check we print the values of alpha as well
    ## We remark that ASIC2 doesnt bind to any of the PDZ Domains that we consider and thus all values should be -1
    print alpha
    if alpha > 0:
        alpha = +1.0
    score = temp*alpha
    temp2 = log_modified(score)
    score_natural += temp2 
print score_natural

ASIC2
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0
2.58576358448


Now that we have calculated the energies for one peptide, let us calculate the ground state energies for all the peptides in the system. We shall write a simple function which does this given a peptide

In [136]:
def evaluate_energy(peptide):
    score_natural = 0.0
    for i in range(len(PDZ_Data.domain_names)): 
        temp = evaluate_score(PDZ_Data.domains[i], peptide)
        alpha = fp_interaction_matrix[peptide.name][i]
        if alpha > 0:
            alpha = +1.0
        score = temp*alpha
        temp2 = log_modified(score)
        score_natural += temp2 
    return score_natural

In [137]:
for pep in PDZ_Data.peptides:
    pep.energy_ground = evaluate_energy(pep)

In [138]:
for pep in PDZ_Data.peptides:
    print pep.name, pep.energy_ground

AN2 32.7714922253
APC 108.837975035
Aquaporin4 57.653418304
ASIC2 2.58576358448
Caspr2 10.2327578819
Cav2.2 7.8158107556
Cftr 10.1187576025
c-KIT 1.75989867909
Claudin1 24.338085902
Cnksr2 25.8454687084
Connexin43 5.88006436035
CRIPT 31.9035523407
CtBP1 25.6643013809
Dlgap123 7.07795685126
EphA71 2.55142344213
EphB2 11.4576849544
EphrinB12 1.6607904112
ErbB4 4.5055182776
Frizzled 28.4843578527
GluR1 7.08075331182
GluR2_1 8.96655307331
GluR5_1 31.1839205345
GlycphrinC 55.5078676101
GRK6 18.068508857
Htr2c 28.1979327446
JAM-1 6.36765080863
KIF17 4.01838283588
KIF1B 26.4271991025
Kir2.1 28.5088245181
Kv1.4 58.1963572783
Lgltminase 23.9056619767
Liprin2 7.84514190615
Megalin 12.7761343881
Mel1a/b 17.76549867
mGluR3 10.059792039
ctransprtr 3.70916853858
Nav1.4 30.2069337851
Nav1.5 33.5980566371
Neurxin1/2 37.7884348124
NMDAR2A 32.4503013503
NMDAR2B 25.8873063503
P2Y1 28.625230555
Parkin 2.06541536473
PDGFR 14.6647382588
PFK-M 15.2354145009
PIX 27.0731044414
PKC 28.2190847596
PMCA1 35.580510