# Vérification du fonctionnement de L

In [1]:
import pandas as pd
import numpy as np

class CarboneAlpha:
    """Classe pour représenter un carbone alpha d'une protéine."""
    
    def __init__(self, number, x, y, z):
        self.number = number
        self.x = x
        self.y = y
        self.z = z
    
    def compute_distance(self, other):
        dist = ((other.x - self.x) ** 2 + (other.y - self.y) ** 2 + (other.z - self.z) ** 2) ** 0.5
        return dist

class Template:
    """Classe pour représenter le template utilisé."""
    
    def __init__(self, file):
        self.structure = self.build_template_from_pdb(file)
        self.length = len(self.structure)

    def build_template_from_pdb(self, filename):
        list_calpha = []
        with open(filename, "r") as pdb :
            for ligne in pdb:
                if ligne.startswith("ATOM") and (ligne[12:16].strip() == "CA"):
                    number = ligne[6:11].strip()
                    x = float(ligne[30:38].strip())
                    y = float(ligne[38:46].strip())
                    z = float(ligne[46:54].strip())
                                       
                    list_calpha.append(CarboneAlpha(number, x, y, z))
        return list_calpha
        
    def build_dist_matrix(self):
        dist_list = []
        
        for i, atom in enumerate(self.structure):
            dist_ligne = []
            for other in (self.structure):
                dist_ligne.append(atom.compute_distance(other))
            dist_list.append(dist_ligne)
            
        dist_matrix = np.array(dist_list)
        return dist_matrix
    
    def __str__(self):
        string = ""
        for i, ca in enumerate(self.structure):
            string += f"position {i}-{ca.number}, coor( {ca.x}, {ca.y}, {ca.z})\n"
        return string

def clean_DOPE_data(filename):
    ca_matrix = []
    
    with open(filename, "r") as dope :
        for ligne in dope:
            if ligne[3:7].strip() == "CA" and ligne[11:14].strip() == "CA":
                ca_matrix.append(ligne.split())
    
    columns = ['res1', 'temp1', 'res2', 'temp2'] + list(np.arange(0.25, 15, 0.5))
    dope_score = pd.DataFrame(ca_matrix, columns = columns) 
    dope_score = dope_score.drop(['temp1', 'temp2'], axis=1)
    
    return dope_score

class DynamicMatrix:    
    def __init__(self, lines, columns, gap):
        self.matrix = np.zeros((lines, columns))
        self.lines = lines
        self.columns = columns
        self.gap = gap

    def initialize_matrix(self, first_val, start, end, get_score):
        if (start[0] < 0) or (start[1] < 0):
            raise ValueError("Start of initialization out of matrix.")
        if (end[0] >= self.lines) or (end[1] >= self.columns):
            raise ValueError("End of initialization out of matrix.")
        
        # Première case
        self.matrix[start[0], start[1]] = first_val
        
        # Remplissage de la première colonne jusqu'à la limite
        for i in range(start[0] + 1, end[0] + 1):
            self.matrix[i, start[1]] = self.matrix[i - 1, start[1]] + self.gap + get_score(i, start[1])

        # Remplissage de la première ligne jusqu'à la limite
        for j in range(start[1] + 1, end[1] + 1):
            self.matrix[start[0], j] = self.matrix[start[0], j - 1] + self.gap + get_score(start[0], j)

# Matrice de distance
PDB_FILE = "../data/5awl.pdb"
TEMPLATE = Template(PDB_FILE)
DIST_MATRIX = TEMPLATE.build_dist_matrix()
    
# Matrice DOPE
DOPE_FILE = "../data/dope.par"
DOPE_MATRIX = clean_DOPE_data(DOPE_FILE)

In [24]:
class LowLevelMatrix(DynamicMatrix):
    aa_codes = {
    'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS',
    'Q': 'GLN', 'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
    'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO',
    'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL'
    }
    
    def __init__(self, gap, frozen, distance, dope, sequence):
        lines = len(sequence)
        columns = len(distance)
        
        DynamicMatrix.__init__(self, lines, columns, gap)

        # Vérification du blocage de la case
        if (frozen['seq_id'] >= lines) or (frozen['seq_id'] < 0):
            raise ValueError("Frozen line index out of matrix.")
        if (frozen['pos_id'] >= columns) or (frozen['pos_id'] < 0):
            raise ValueError("Frozen column index out of matrix")

        # Récupération du résidu fixé
        frozen['seq_res'] = sequence[frozen['seq_id']]
        
        self.frozen = frozen
        self.distance = distance
        self.dope = dope
        self.sequence = sequence

        print(distance)

    def round_distance(self, dist):
        # arrondi au quart le plus proche
        rounded_value = round(dist * 4) / 4
        
        # ne garde que 0.25 ou 0.75
        decimal = rounded_value % 1
        if decimal == 0.0:
            return rounded_value + 0.25
        elif decimal == 0.5:
            return rounded_value + 0.25
        else:
            return rounded_value
    
    def get_score(self, i, j):
        dist = self.distance[j, self.frozen["pos_id"]]
        closest_dist = self.round_distance(dist)

        score = self.dope.loc[(self.dope['res1'] == self.aa_codes[self.frozen['seq_res']]) & 
                              (self.dope['res2'] == self.aa_codes[self.sequence[i]]), 
                              closest_dist]
        print(i, j, dist, closest_dist, score.values[0])
        return float(score.values[0])
    
    def fill_matrix(self):
        # Partie supérieure gauche
        self.initialize_matrix(self.get_score(0, 0), [0, 0], 
                               [self.frozen['seq_id'] - 1, self.frozen['pos_id'] - 1],
                               self.get_score)

        for i in range(1, self.frozen['seq_id']):
            for j in range(1, self.frozen['pos_id']):
                score = self.get_score(i, j)
                self.matrix[i, j] = score + min(self.matrix[i - 1, j - 1],
                                                self.matrix[i - 1, j] + self.gap,
                                                self.matrix[i, j - 1] + self.gap
                                               )

        # Case fixée
        if (self.frozen['seq_id'] == 0):
            self.matrix[self.frozen['seq_id'], self.frozen['pos_id']] = self.matrix[self.frozen['seq_id'], self.frozen['pos_id'] - 1]
        elif (self.frozen['pos_id'] == 0):
            self.matrix[self.frozen['seq_id'], self.frozen['pos_id']] = self.matrix[self.frozen['seq_id'] - 1, self.frozen['pos_id']]
        else :
            self.matrix[self.frozen['seq_id'], self.frozen['pos_id']] = self.matrix[self.frozen['seq_id'] - 1, self.frozen['pos_id'] - 1]

        # Partie inférieure droite (si elle existe)
        if (self.frozen['seq_id'] != self.lines - 1) and (self.frozen['pos_id'] != self.columns - 1):
            self.initialize_matrix(self.matrix[self.frozen['seq_id'], self.frozen['pos_id']] +
                                   self.get_score(self.frozen['seq_id'] + 1, self.frozen['pos_id'] + 1),
                                   [self.frozen['seq_id'] + 1, self.frozen['pos_id'] + 1],
                                   [self.lines - 1, self.columns - 1], self.get_score
                                  )

            if (self.frozen['seq_id'] != self.lines - 2) and (self.frozen['pos_id'] != self.columns - 2):
                for i in range(self.frozen['seq_id'] + 2, self.lines):
                    for j in range(self.frozen['pos_id'] + 2, self.columns):
                        score = self.get_score(i, j)
                        self.matrix[i, j] = score + min(self.matrix[i - 1, j - 1],
                                                        self.matrix[i - 1, j] + self.gap,
                                                        self.matrix[i, j - 1] + self.gap
                                                       )
                        max_score = self.matrix[i, j]
            else :
                max_score = self.matrix[self.lines - 1, self.columns - 1]

        else :
            max_score = self.matrix[self.frozen['seq_id'], self.frozen['pos_id']]
        
        return max_score

In [25]:
FROZEN = {'seq_id': 9, 'pos_id': 1}
SEQUENCE = "YYDPETGTWY"
GAP = 0
    
LOW_TEST = LowLevelMatrix(GAP, FROZEN, DIST_MATRIX, DOPE_MATRIX, SEQUENCE)
MAX_SCORE = LOW_TEST.fill_matrix()
print(MAX_SCORE)

[[ 0.          3.81109512  6.46803656  9.24300692 11.98601844 11.3998171
   9.23535825  8.09624073  5.42059056  5.50859556]
 [ 3.81109512  0.          3.7937007   6.35816994  8.84260199  8.00722187
   5.47928225  5.2302759   4.41736324  6.2344752 ]
 [ 6.46803656  3.7937007   0.          3.81970967  5.58317732  5.52730486
   4.92545013  5.49911947  6.36471421  6.97604415]
 [ 9.24300692  6.35816994  3.81970967  0.          3.81323419  5.92562748
   5.68571447  8.18768007  9.79829256 10.7818849 ]
 [11.98601844  8.84260199  5.58317732  3.81323419  0.          3.78765495
   5.96358567  8.39334343 11.0356244  11.92004333]
 [11.3998171   8.00722187  5.52730486  5.92562748  3.78765495  0.
   3.83270662  5.44332031  8.79259461 10.24659724]
 [ 9.23535825  5.47928225  4.92545013  5.68571447  5.96358567  3.83270662
   0.          3.78029734  6.8109365   9.39523001]
 [ 8.09624073  5.2302759   5.49911947  8.18768007  8.39334343  5.44332031
   3.78029734  0.          3.80555594  6.50836838]
 [ 5.4205

In [9]:
DIST_MATRIX

array([[ 0.        ,  3.81109512,  6.46803656,  9.24300692, 11.98601844,
        11.3998171 ,  9.23535825,  8.09624073,  5.42059056,  5.50859556],
       [ 3.81109512,  0.        ,  3.7937007 ,  6.35816994,  8.84260199,
         8.00722187,  5.47928225,  5.2302759 ,  4.41736324,  6.2344752 ],
       [ 6.46803656,  3.7937007 ,  0.        ,  3.81970967,  5.58317732,
         5.52730486,  4.92545013,  5.49911947,  6.36471421,  6.97604415],
       [ 9.24300692,  6.35816994,  3.81970967,  0.        ,  3.81323419,
         5.92562748,  5.68571447,  8.18768007,  9.79829256, 10.7818849 ],
       [11.98601844,  8.84260199,  5.58317732,  3.81323419,  0.        ,
         3.78765495,  5.96358567,  8.39334343, 11.0356244 , 11.92004333],
       [11.3998171 ,  8.00722187,  5.52730486,  5.92562748,  3.78765495,
         0.        ,  3.83270662,  5.44332031,  8.79259461, 10.24659724],
       [ 9.23535825,  5.47928225,  4.92545013,  5.68571447,  5.96358567,
         3.83270662,  0.        ,  3.78029734

In [15]:
DIST_MATRIX[2, 0]

6.468036564522496

## Gap avant et après frozen possible

In [64]:
class LowLevelMatrix(DynamicMatrix):
    aa_codes = {
    'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS',
    'Q': 'GLN', 'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
    'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO',
    'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL'
    }
    
    def __init__(self, gap, frozen, distance, dope, sequence):
        lines = len(sequence)
        columns = len(distance)
        
        DynamicMatrix.__init__(self, lines, columns, gap)

        # Vérification du blocage de la case
        if (frozen['seq_id'] >= lines) or (frozen['seq_id'] < 0):
            raise ValueError("Frozen line index out of matrix.")
        if (frozen['pos_id'] >= columns) or (frozen['pos_id'] < 0):
            raise ValueError("Frozen column index out of matrix")

        # Récupération du résidu fixé
        frozen['seq_res'] = sequence[frozen['seq_id']]
        
        self.frozen = frozen
        self.distance = distance
        self.dope = dope
        self.sequence = sequence

    def round_distance(self, dist):
        # arrondi au quart le plus proche
        rounded_value = round(dist * 4) / 4
        
        # ne garde que 0.25 ou 0.75
        decimal = rounded_value % 1
        if decimal == 0.0:
            return rounded_value + 0.25
        elif decimal == 0.5:
            return rounded_value + 0.25
        else:
            return rounded_value
    
    def get_score(self, i, j):
        dist = self.distance[j, self.frozen["pos_id"]]
        closest_dist = self.round_distance(dist)

        score = self.dope.loc[(self.dope['res1'] == self.aa_codes[self.frozen['seq_res']]) & 
                              (self.dope['res2'] == self.aa_codes[self.sequence[i]]), 
                              closest_dist]
        
        return float(score.values[0])
    
    def fill_matrix(self):
        # Partie supérieure gauche
        self.initialize_matrix(self.get_score(0, 0), [0, 0], 
                               [self.frozen['seq_id'], self.frozen['pos_id']],
                               self.get_score)

        for i in range(1, self.frozen['seq_id'] + 1):
            for j in range(1, self.frozen['pos_id'] + 1):
                score = self.get_score(i, j)
                self.matrix[i, j] = score + min(self.matrix[i - 1, j - 1],
                                                self.matrix[i - 1, j] + self.gap,
                                                self.matrix[i, j - 1] + self.gap
                                               )
        # Partie inférieure droite
        self.initialize_matrix(self.matrix[self.frozen['seq_id'], self.frozen['pos_id']],
                               [self.frozen['seq_id'], self.frozen['pos_id']],
                               [self.lines - 1, self.columns - 1],
                               self.get_score)

        for i in range(self.frozen['seq_id'] + 1, self.lines):
            for j in range(self.frozen['pos_id'] + 1, self.columns):
                score = self.get_score(i, j)
                self.matrix[i, j] = score + min(self.matrix[i - 1, j - 1],
                                                self.matrix[i - 1, j] + self.gap,
                                                self.matrix[i, j - 1] + self.gap
                                               )

        print(self.matrix)
        max_score = self.matrix[self.lines - 1, self.columns - 1]
        return max_score

In [84]:
FROZEN = {'seq_id': 5, 'pos_id': 5}
SEQUENCE = "YYDPETGTWY"
GAP = 0
    
LOW_TEST = LowLevelMatrix(GAP, FROZEN, DIST_MATRIX, DOPE_MATRIX, SEQUENCE)
MAX_SCORE = LOW_TEST.fill_matrix()
print(MAX_SCORE)

[[-3.000e-02  1.400e-01 -2.800e-01 -5.600e-01 -2.100e+00  7.900e+00
   0.000e+00  0.000e+00  0.000e+00  0.000e+00]
 [-6.000e-02  1.100e-01 -7.000e-01 -9.800e-01 -3.640e+00  6.360e+00
   0.000e+00  0.000e+00  0.000e+00  0.000e+00]
 [-2.000e-02  1.700e-01 -1.290e+00 -1.470e+00 -5.230e+00  4.770e+00
   0.000e+00  0.000e+00  0.000e+00  0.000e+00]
 [ 7.000e-02  1.000e-01 -1.700e+00 -2.100e+00 -6.960e+00  3.040e+00
   0.000e+00  0.000e+00  0.000e+00  0.000e+00]
 [ 1.200e-01  2.700e-01 -2.010e+00 -2.320e+00 -8.600e+00  1.400e+00
   0.000e+00  0.000e+00  0.000e+00  0.000e+00]
 [ 1.300e-01  2.500e-01 -2.390e+00 -2.680e+00 -1.015e+01 -1.500e-01
  -1.700e+00 -2.080e+00 -2.180e+00 -2.400e+00]
 [ 0.000e+00  0.000e+00  0.000e+00  0.000e+00  0.000e+00  9.850e+00
  -3.370e+00 -3.840e+00 -3.890e+00 -4.030e+00]
 [ 0.000e+00  0.000e+00  0.000e+00  0.000e+00  0.000e+00  1.985e+01
  -4.920e+00 -5.300e+00 -5.400e+00 -5.620e+00]
 [ 0.000e+00  0.000e+00  0.000e+00  0.000e+00  0.000e+00  2.985e+01
  -6.430e+00