# Ancien algo

In [1]:
import pandas as pd
import numpy as np

class CarboneAlpha:
    """Classe pour représenter un carbone alpha d'une protéine."""
    
    def __init__(self, number, x, y, z):
        self.number = number
        self.x = x
        self.y = y
        self.z = z
    
    def compute_distance(self, other):
        dist = ((other.x - self.x) ** 2 + (other.y - self.y) ** 2 + (other.z - self.z) ** 2) ** 0.5
        return dist

class Template:
    """Classe pour représenter le template utilisé."""

    def __init__(self, file):
        self.structure = self.build_template_from_pdb(file)
        self.length = len(self.structure)

    def build_template_from_pdb(self, filename):
        list_calpha = []
        with open(filename, "r") as pdb :
            for ligne in pdb:
                if ligne.startswith("ATOM") and (ligne[12:16].strip() == "CA"):
                    number = ligne[6:11].strip()
                    x = float(ligne[30:38].strip())
                    y = float(ligne[38:46].strip())
                    z = float(ligne[46:54].strip())
                                       
                    list_calpha.append(CarboneAlpha(number, x, y, z))
        return list_calpha
        
    def build_dist_matrix(self):
        dist_list = []
        
        for i, atom in enumerate(self.structure):
            dist_ligne = []
            for other in (self.structure):
                dist_ligne.append(atom.compute_distance(other))
            dist_list.append(dist_ligne)
            
        dist_matrix = np.array(dist_list)
        return dist_matrix
    
    def __str__(self):
        string = ""
        for i, ca in enumerate(self.structure):
            string += f"position {i}-{ca.number}, coor( {ca.x}, {ca.y}, {ca.z})\n"
        return string

def clean_DOPE_data(filename):
    ca_matrix = []
    
    with open(filename, "r") as dope :
        for ligne in dope:
            if ligne[3:7].strip() == "CA" and ligne[11:14].strip() == "CA":
                ca_matrix.append(ligne.split())
    
    columns = ['res1', 'temp1', 'res2', 'temp2'] + list(np.arange(0.25, 15, 0.5))
    dope_score = pd.DataFrame(ca_matrix, columns = columns) 
    dope_score = dope_score.drop(['temp1', 'temp2'], axis=1)
    
    return dope_score

def get_fasta_sequence(filename):
    sequence = ""
    with open(filename, "r") as fasta:
        for ligne in fasta:
            if ligne.startswith(">"):
                continue
            sequence += ligne.strip()
    return sequence

class DynamicMatrix:    
    def __init__(self, lines, columns, gap):
        self.matrix = np.zeros((lines, columns))
        self.lines = lines
        self.columns = columns
        self.gap = gap

    def initialize_matrix(self, first_val, start, end):
        if (start[0] < 0) or (start[1] < 0):
            raise ValueError("Start of initialization out of matrix.")
        if (end[0] >= self.lines) or (end[1] >= self.columns):
            raise ValueError("End of initialization out of matrix.")
        
        # Première case
        self.matrix[start[0], start[1]] = first_val
        
        # Remplissage de la première colonne jusqu'à la limite
        for i in range(start[0] + 1, end[0] + 1):
            self.matrix[i, start[1]] = self.matrix[i - 1, start[1]] + self.gap

        # Remplissage de la première ligne jusqu'à la limite
        for j in range(start[1] + 1, end[1] + 1):
            self.matrix[start[0], j] = self.matrix[start[0], j - 1] + self.gap

class LowLevelMatrix(DynamicMatrix):
    aa_codes = {
    'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS',
    'Q': 'GLN', 'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
    'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO',
    'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL'
    }
    
    def __init__(self, gap, frozen, distance, dope, sequence):
        lines = len(sequence)
        columns = len(distance)
        
        DynamicMatrix.__init__(self, lines, columns, gap)

        # Vérification du blocage de la case
        if (frozen['seq_id'] >= lines) or (frozen['seq_id'] < 0):
            raise ValueError("Frozen line index out of matrix.")
        if (frozen['pos_id'] >= columns) or (frozen['pos_id'] < 0):
            raise ValueError("Frozen column index out of matrix")

        # Récupération du résidu fixé
        frozen['seq_res'] = sequence[frozen['seq_id']]
        
        self.frozen = frozen
        self.distance = distance
        self.dope = dope
        self.sequence = sequence

    def round_distance(self, dist):
        # arrondi au quart le plus proche
        rounded_value = round(dist * 4) / 4
        
        # ne garde que 0.25 ou 0.75
        decimal = rounded_value % 1
        if decimal == 0.0:
            return rounded_value + 0.25
        elif decimal == 0.5:
            return rounded_value + 0.25
        else:
            return rounded_value
    
    def get_score(self, i, j):
        # Cas du résidu bloqué avec sa propre position
        #if (i == self.frozen["seq_id"] or j == self.frozen["pos_id"]):
            #return 0
        
        dist = self.distance[self.frozen["pos_id"], j]
        closest_dist = self.round_distance(dist)
        score = self.dope.loc[(self.dope['res1'] == self.aa_codes[self.frozen['seq_res']]) & 
                              (self.dope['res2'] == self.aa_codes[self.sequence[i]]), 
                              closest_dist]
        
        return float(score.values[0])
    
    def fill_matrix(self):
        # Partie supérieure gauche
        self.initialize_matrix(self.get_score(0, 0), [0, 0], 
                               [self.frozen['seq_id'] - 1, self.frozen['pos_id'] - 1])
        
        for i in range(1, self.frozen['seq_id']):
            for j in range(1, self.frozen['pos_id']):
                score = self.get_score(i, j)
                self.matrix[i, j] = min(self.matrix[i - 1, j - 1] + score,
                                        self.matrix[i - 1, j] + self.gap,
                                        self.matrix[i, j - 1] + self.gap
                                       )

        # Case fixée
        self.matrix[self.frozen['seq_id'], self.frozen['pos_id']] = self.matrix[self.frozen['seq_id'] - 1, self.frozen['pos_id'] - 1]
        
        # Partie inférieure droite (si elle existe)
        if (self.frozen['pos_id'] == self.columns - 1 or self.frozen['seq_id'] == self.lines - 1):
            max_score = self.matrix[self.frozen['seq_id'], self.frozen['pos_id']]

        else:
            self.initialize_matrix(self.matrix[self.frozen['seq_id'], self.frozen['pos_id']],
                                   [self.frozen['seq_id'] + 1, self.frozen['pos_id'] + 1],
                                   [self.lines - 1, self.columns - 1])
    
            for i in range(self.frozen['seq_id'] + 1, self.lines):
                for j in range(self.frozen['pos_id'] + 1, self.columns):
                    score = self.get_score(i, j)
                    self.matrix[i, j] = min(self.matrix[i - 1, j - 1] + score,
                                            self.matrix[i - 1, j] + self.gap,
                                            self.matrix[i, j - 1] + self.gap
                                           )
    
            max_score = self.matrix[self.lines - 1, self.columns - 1]
        return max_score

class HighLevelMatrix(DynamicMatrix):
    def __init__(self, gap, query, template, dope):
        distance = template.build_dist_matrix()
        lines = len(query)
        columns = len(distance)

        DynamicMatrix.__init__(self, lines, columns, gap)

        self.sequence = query
        self.distance = distance
        self.dope = dope

        self.get_score_matrix()

    def get_score_matrix(self):
        self.score_matrix = np.zeros((self.lines, self.columns))
        for i in range(self.lines):
            for j in range(self.columns):
                frozen = {'seq_id': i, 'pos_id': j}
                low_level = LowLevelMatrix(self.gap, frozen, self.distance, self.dope, self.sequence)
                self.score_matrix[i, j] =  low_level.fill_matrix()

    def get_score(self, i, j):
        score = self.score_matrix[i, j]
        return score

    def fill_matrix(self):
        # Initialisation
        self.initialize_matrix(self.get_score(0, 0), [0, 0], 
                               [self.lines - 1, self.columns - 1])
        
        # Remplissage
        for i in range(1, self.lines):
            for j in range(1, self.columns):
                score = self.get_score(i, j)
                self.matrix[i, j] = min(self.matrix[i - 1, j - 1] + score,
                                        self.matrix[i - 1, j] + self.gap,
                                        self.matrix[i, j - 1] + self.gap
                                       )
        max_score = self.matrix[self.lines - 1, self.columns - 1]

        return max_score

    def get_alignment(self):
        structure_align = []
        sequence_align = []
        
        i = self.lines - 1
        j = self.columns - 1
        while not ((i == 0) and (j == 0)):
            print(i, j)
            square = self.matrix[i, j]
            score = self.score_matrix[i, j]
            # Match
            if (square == self.matrix[i - 1, j - 1] + score):
                print("match")
                structure_align.insert(0, j + 1)
                sequence_align.insert(0, self.sequence[i])
                i = i - 1
                j = j - 1
            # Gap
            else:
                if (square == self.matrix[i - 1, j] + self.gap):
                    print("gap structure")
                    structure_align.insert(0, '-')
                    sequence_align.insert(0, self.sequence[i])
                    i = i - 1
                elif (square == self.matrix[i, j - 1] + self.gap):
                    print("gap sequence")
                    structure_align.insert(0, j + 1)
                    sequence_align.insert(0, '-')
                    j = j - 1

        return ''.join(sequence_align), ''.join(str(x) for x in structure_align)

# Algo documenté

In [16]:
"""
Double Dynamic Programming algorithm to thread sequence onto template.

Author :
    Gloria BENOIT
Date :
    2024-09-12
"""
import sys
from pathlib import Path
from multiprocessing import Pool

import pandas as pd
import numpy as np


class AlphaCarbon:
    """
    Class used to represent a protein's alpha carbon.

    Instance Attributes
    -------------------
    number : int
        Position number in protein.
    x : float
        X position.
    y : float
        Y position.
    z : float
        Z position.

    Methods
    -------
    compute_distance(other)
        Compute distance between himself and another alpha carbon.
    """

    def __init__(self, number, x, y, z):
        """
        Construct an alpha carbon.

        Parameters
        ----------
        number : int
            Position number in protein.
        x : float
            X position.
        y : float
            Y position.
        z : float
            Z position.
        """
        self.number = number
        self.x = x
        self.y = y
        self.z = z

    def compute_distance(self, other):
        """
        Compute distance between himself and another alpha carbon.

        Parameters
        ----------
        other : AlphaCarbon
            Another alpha carbon.

        Returns
        -------
        float
            Distance between both alpha carbon.
        """
        dist = (
            (other.x - self.x) ** 2
            + (other.y - self.y) ** 2
            + (other.z - self.z) ** 2
        ) ** 0.5
        return dist


class Template:
    """
    Class used to represent a structural template.

    Instance Attributes
    -------------------
    structure : list of AlphaCarbon
        All alpha carbon found in structure.
    length : int
        Number of alpha carbon found.

    Methods
    -------
    build_from_pdb(filename)
        Retrieve structural template from PDB file.
    build_dist_matrix()
        Compute a distance matrix between all alpha carbon
        in the template.
    """

    def __init__(self, file):
        """
        Construct a structural template.

        Parameters
        ----------
        file : str
            PDB file to use.
        """
        self.structure = self.build_from_pdb(file)
        self.length = len(self.structure)

    def __str__(self):
        """
        Return information on the template's alpha carbons.

        Returns
        -------
        str
            Number, position and coordinates of each alpha carbon
            in the template.
        """
        string = ""
        for i, ca in enumerate(self.structure):
            string += (
                f"position {i}-{ca.number}, coor( {ca.x}, {ca.y}, {ca.z})\n"
            )
        return string

    def build_from_pdb(self, filename):
        """
        Retrieve structural template from PDB file.

        Parameters
        ----------
        filename : str
            PDB file.

        Returns
        -------
        list of AlphaCarbon
            List of all alpha carbon found in the template.
        """
        list_calpha = []
        with open(filename, "r", encoding='UTF-8') as pdb:
            for ligne in pdb:
                if ligne.startswith("ATOM") and (ligne[12:16].strip() == "CA"):
                    number = ligne[22:26].strip()
                    x = float(ligne[30:38].strip())
                    y = float(ligne[38:46].strip())
                    z = float(ligne[46:54].strip())

                    list_calpha.append(AlphaCarbon(number, x, y, z))

                # Conservation du premier modèle uniquement
                if ligne.startswith("MODEL        2"):
                    break
        return list_calpha

    def build_dist_matrix(self):
        """
        Compute a distance matrix between all alpha carbon in the template.

        Returns
        -------
        numpy.ndarray
            2D array of float, representing the distance between
            all pairs of alpha carbon in template.
        """
        dist_list = []

        for atom in self.structure:
            dist_ligne = []
            for other in self.structure:
                dist_ligne.append(atom.compute_distance(other))
            dist_list.append(dist_ligne)

        dist_matrix = np.array(dist_list)
        return dist_matrix


class DynamicMatrix:
    """
    Class used to represent a dynamic programming matrix.

    Instance Attributes
    -------------------
    matrix : numpy.ndarray
        2D array of float, representing the scores obtained.
    lines : int
        Number of lines in matrix.
    columns : int
        Number of columns in matrix.
    gap : int
        Gap penalty.

    Methods
    -------
    initialize_matrix(first_val, start, end)
        Initialize the first line and column of part of the matrix.
    """

    def __init__(self, lines, columns, gap):
        """
        Construct a dynamic programming matrix.

        Parameters
        ----------
        lines : int
            Number of lines in matrix.
        columns : int
            Number of columns in matrix.
        gap : int
            Gap penalty.
        """
        self.matrix = np.zeros((lines, columns))
        self.lines = lines
        self.columns = columns
        self.gap = gap

    def initialize_matrix(self, first_val, start, end):
        """
        Initialize the first line and column of part of the matrix.

        Parameters
        ----------
        first_val : float
            Value of the first square.
        start : list
            Line and column index of the first position to initialize.
        end : list
            Line anc column index of last position to initialize.

        Raises
        ------
        ValueError
            If the part to initialize is out of the matrix.
        """
        if (start[0] < 0) or (start[1] < 0):
            raise ValueError("Start of initialization out of matrix.")
        if (end[0] >= self.lines) or (end[1] >= self.columns):
            raise ValueError("End of initialization out of matrix.")

        # Première case
        self.matrix[start[0], start[1]] = first_val

        # Remplissage de la première colonne jusqu'à la limite
        for i in range(start[0] + 1, end[0] + 1):
            self.matrix[i, start[1]] = self.matrix[i - 1, start[1]] + self.gap

        # Remplissage de la première ligne jusqu'à la limite
        for j in range(start[1] + 1, end[1] + 1):
            self.matrix[start[0], j] = self.matrix[start[0], j - 1] + self.gap


class LowLevelMatrix(DynamicMatrix):
    """
    Class used to represent a low level matrix.

    Lines are the sequence and columns the structure.
    Inherits all attributes and methods from DynamicMatrix.

    Class Attributes
    ----------------
    aa_codes : dict
        Dictionnary of amino acides codes, with one letter code
        as keys and three letter code as values.

    Instance Attributes
    -------------------
    frozen : dict
        Dictionary containing information on the frozen square.
            - 'seq_id' : int
                Sequence index of frozen square.
            - 'pos_id' : int
                Structure index of frozen square.
            - 'seq_res' : str
                Amino acid of the frozen square.
    distance : numpy.ndarray
        2D array of float, representing the distance between
        all pairs of alpha carbon in template.
    dope : pandas.DataFrame
        Dataframe containing DOPE potentials based on amino acids
        and distance.
            - 'res1' : str
                Three letter code of the first amino acid.
            - 'res2' : str
                Three letter code of the second amino acid.
            - All other columns : float
                Column names are distance from 0.25 to 14.75 with 0.5
                incrementation. Values are the DOPE potentials for said
                distance between first and second amino acids.
    sequence : str
        Sequence to thread on template.

    Methods
    -------
    round_distance(dist)
        Round value to closest 0.25 or 0.75 decimal.
    get_score(i, j)
        Compute score for i residue and j position.
    fill_matrix()
        Compute low level matrix by dynamic programming.
    """

    aa_codes = {
        "A": "ALA", "R": "ARG", "N": "ASN", "D": "ASP", "C": "CYS",
        "Q": "GLN", "E": "GLU", "G": "GLY", "H": "HIS", "I": "ILE",
        "L": "LEU", "K": "LYS", "M": "MET", "F": "PHE", "P": "PRO",
        "S": "SER", "T": "THR", "W": "TRP", "Y": "TYR", "V": "VAL"
    }

    def __init__(self, gap, frozen, distance, dope, sequence):
        """
        Construct a low level matrix.

        Parameters
        ----------
        gap : int
            Gap penalty.
        frozen : dict
            Dictionary containing information on the frozen square.
                - 'seq_id' : int
                    Sequence index of frozen square.
                - 'pos_id' : int
                    Structure index of frozen square.
        distance : numpy.ndarray
            2D array of float, representing the distance between
            all pairs of alpha carbon in template.
        dope : pandas.DataFrame
            Dataframe containing DOPE potentials based on amino acids
            and distance.
                - 'res1' : str
                    Three letter code of the first amino acid.
                - 'res2' : str
                    Three letter code of the second amino acid.
                - All other columns : float
                    Column names are distance from 0.25 to 14.75 with 0.5
                    incrementation. Values are the DOPE potentials for said
                    distance between first and second amino acids.
        sequence : str
            Sequence to thread on template.

        Raises
        ------
        ValueError
            If frozen square is out of matrix.
        """
        lines = len(sequence)
        columns = np.shape(distance)[1]

        DynamicMatrix.__init__(self, lines, columns, gap)

        # Vérification du blocage de la case
        if (frozen["seq_id"] >= lines) or (frozen["seq_id"] < 0):
            raise ValueError("Frozen line index out of matrix.")
        if (frozen["pos_id"] >= columns) or (frozen["pos_id"] < 0):
            raise ValueError("Frozen column index out of matrix")

        # Récupération du résidu fixé
        frozen["seq_res"] = sequence[frozen["seq_id"]]

        self.frozen = frozen
        self.distance = distance
        self.dope = dope
        self.sequence = sequence

    def round_distance(self, dist):
        """
        Round value to closest 0.25 or 0.75 decimal.

        If value is equally closer to 0.25 and 0.75,
        it is rounded to the upper.

        Parameters
        ----------
        dist : float
            Distance between two alpha carbon.

        Returns
        -------
        float
            Rounded distance.
        """
        # Cas de la distance trop grande
        if dist > 14.75:
            return 14.75
        
        # arrondi au quart le plus proche
        rounded_value = round(dist * 4) / 4

        # ne garde que 0.25 ou 0.75
        decimal = rounded_value % 1
        if decimal == 0.0:
            return rounded_value + 0.25
        if decimal == 0.5:
            return rounded_value + 0.25

        return rounded_value

    def get_score(self, i, j):
        """
        Compute score for i residue and j position.

        Parameters
        ----------
        i : int
            Residue index.
        j : int
            Position index.

        Returns
        -------
        float
            DOPE potential for said residue at said position,
            with distance to frozen square.
        """
        dist = self.distance[self.frozen["pos_id"], j]
        closest_dist = self.round_distance(dist)
        score = self.dope.loc[
            (self.dope["res1"] == self.aa_codes[self.frozen["seq_res"]])
            & (self.dope["res2"] == self.aa_codes[self.sequence[i]]),
            closest_dist,
        ]

        return float(score.values[0])

    def fill_matrix(self):
        """
        Compute low level matrix by dynamic programming.

        Returns
        -------
        float
            Final score of low level matrix.
        """
        # Partie supérieure gauche
        self.initialize_matrix(
            self.get_score(0, 0),
            [0, 0],
            [self.frozen["seq_id"] - 1, self.frozen["pos_id"] - 1],
        )

        for i in range(1, self.frozen["seq_id"]):
            for j in range(1, self.frozen["pos_id"]):
                score = self.get_score(i, j)
                self.matrix[i, j] = min(
                    self.matrix[i - 1, j - 1] + score,
                    self.matrix[i - 1, j] + self.gap,
                    self.matrix[i, j - 1] + self.gap,
                )

        # Case fixée
        self.matrix[self.frozen["seq_id"], self.frozen["pos_id"]] = (
            self.matrix[self.frozen["seq_id"] - 1, self.frozen["pos_id"] - 1]
        )

        # Partie inférieure droite (si elle existe)
        if (
            self.frozen["pos_id"] == self.columns - 1
            or self.frozen["seq_id"] == self.lines - 1
        ):
            max_score = self.matrix[
                self.frozen["seq_id"], self.frozen["pos_id"]
            ]

        else:
            self.initialize_matrix(
                self.matrix[self.frozen["seq_id"], self.frozen["pos_id"]],
                [self.frozen["seq_id"] + 1, self.frozen["pos_id"] + 1],
                [self.lines - 1, self.columns - 1],
            )

            for i in range(self.frozen["seq_id"] + 1, self.lines):
                for j in range(self.frozen["pos_id"] + 1, self.columns):
                    score = self.get_score(i, j)
                    self.matrix[i, j] = min(
                        self.matrix[i - 1, j - 1] + score,
                        self.matrix[i - 1, j] + self.gap,
                        self.matrix[i, j - 1] + self.gap,
                    )

            max_score = self.matrix[self.lines - 1, self.columns - 1]
        return max_score


class HighLevelMatrix(DynamicMatrix):
    """
    Class used to represent a high level matrix.

    Lines are the sequence and columns the structure.
    Inherits all attributes and methods from DynamicMatrix.

    Instance Attributes
    -------------------
    sequence : str
        Sequence to thread on template.
    template : Template
        Structural template.
    distance : numpy.ndarray
        2D array of float, representing the distance between
        all pairs of alpha carbon in template.
    dope : pandas.DataFrame
        Dataframe containing DOPE potentials based on amino acids
        and distance.
            - 'res1' : str
                Three letter code of the first amino acid.
            - 'res2' : str
                Three letter code of the second amino acid.
            - All other columns : float
                Column names are distance from 0.25 to 14.75 with 0.5
                incrementation. Values are the DOPE potentials for said
                distance between first and second amino acids.
    score_matrix : numpy.ndarray
        2D array of float, representing the maximum scores obtained in all
        low level matrixes.

    Methods
    -------
    compute_low_level(args)
        Construct a low level matrix.
    get_score_matrix()
        Construct score matrix based on maximum scores obtained in
        low level matrixes.
    get_score(i, j)
        Compute score for i residue and j position.
    fill_matrix()
        Compute high level matrix by dynamic programming.
    get_alignment()
        Find optimal alignement.
    print_alignment(score, sequence_align, structure_align, max_char=50)
        Displays an alignment.
    """

    def __init__(self, gap, query, template, dope):
        """
        Construct a low level matrix.

        Parameters
        ----------
        gap : int
            Gap penalty.
        query : str
            Sequence to thread on template.
        template : Template
            Structural template.
        dope : pandas.DataFrame
            Dataframe containing DOPE potentials based on amino acids
            and distance.
                - 'res1' : str
                    Three letter code of the first amino acid.
                - 'res2' : str
                    Three letter code of the second amino acid.
                - All other columns : float
                    Column names are distance from 0.25 to 14.75 with 0.5
                    incrementation. Values are the DOPE potentials for said
                    distance between first and second amino acids.
        """
        distance = template.build_dist_matrix()
        lines = len(query)
        columns = len(distance)

        # Ajout d'une colonne et ligne au départ
        DynamicMatrix.__init__(self, lines + 1, columns + 1, gap)

        self.sequence = query
        self.template = template
        self.distance = distance
        self.dope = dope

        self.get_score_matrix()

    def compute_low_level(self, args):
        """
        Construct a low level matrix.

        Parameters
        ----------
        args : tuple
            Contains the following :
                - gap : int
                    Gap penalty.
                - distance : numpy.ndarray
                    2D array of float, representing the distance between
                    all pairs of alpha carbon in template.
                - dope : pandas.DataFrame
                    Dataframe containing DOPE potentials based on amino acids
                    and distance.
                        - 'res1' : str
                            Three letter code of the first amino acid.
                        - 'res2' : str
                            Three letter code of the second amino acid.
                        - All other columns : float
                            Column names are distance from 0.25 to 14.75
                            with 0.5 incrementation. Values are the DOPE
                            potentials for said distance between first and
                            second amino acids.
                - sequence : str
                    Sequence to thread on template.
                - i : int
                    Residue index to freeze.
                - j : int
                    Position index to freeze.

        Returns
        -------
        int
            Frozen residue index.
        int
            Frozen position index.
        float
            Final score obtained.
        """
        gap, distance, dope, sequence, i, j = args
        frozen = {"seq_id": i, "pos_id": j}
        low_level = LowLevelMatrix(gap, frozen, distance, dope, sequence)
        max_score = low_level.fill_matrix()

        return i, j, max_score

    def get_score_matrix(self):
        """
        Construct score matrix.

        Score are based on maximum scores obtained in low level matrixes.
        The construction of all low level matrixes is parallelized.
        """
        self.score_matrix = np.zeros((self.lines - 1, self.columns - 1))

        # Parallélisation
        args = [
            (self.gap, self.distance, self.dope, self.sequence, i, j)
            for i in range(self.lines - 1)
            for j in range(self.columns - 1)
        ]

        with Pool() as pool:
            low_levels = pool.map(self.compute_low_level, args)

        for i, j, value in low_levels:
            self.score_matrix[i, j] = value

    def get_score(self, i, j):
        """
        Compute score for i residue and j position.

        Parameters
        ----------
        i : int
            Residue index.
        j : int
            Position index.

        Returns
        -------
        float
            Maximal score of low level matrix where (i, j) is frozen.
        """
        score = self.score_matrix[i, j]
        return score

    def fill_matrix(self):
        """
        Compute high level matrix by dynamic programming.

        Returns
        -------
        float
            Final score of high level matrix.
        """
        # Initialisation
        self.initialize_matrix(0, [0, 0], [self.lines - 1, self.columns - 1])

        # Remplissage
        for i in range(1, self.lines):
            for j in range(1, self.columns):
                score = self.get_score(i - 1, j - 1)
                self.matrix[i, j] = min(
                    self.matrix[i - 1, j - 1] + score,
                    self.matrix[i - 1, j] + self.gap,
                    self.matrix[i, j - 1] + self.gap,
                )
        max_score = self.matrix[self.lines - 1, self.columns - 1]

        return max_score

    def get_alignment(self):
        """
        Find optimal alignement.

        Returns
        -------
        list
            Aligned sequence.
        list
            Aligned structure.
        """
        structure_align = []
        sequence_align = []

        i = self.lines - 1
        j = self.columns - 1
        while (i, j) != (0, 0):
            print(i, j)
            pos_nb = self.template.structure[j - 1].number

            # Bordures de matrice
            if (i == 0):
                print("gap sequence")
                structure_align.insert(0, pos_nb)
                sequence_align.insert(0, "-")
                j = j - 1
                continue
            if (j == 0):
                print("gap structure")
                structure_align.insert(0, "-")
                sequence_align.insert(0, self.sequence[i - 1])
                i = i - 1
                continue

            square = self.matrix[i, j]
            score = self.score_matrix[i - 1, j - 1]

            # Match
            if square == self.matrix[i - 1, j - 1] + score:
                print("match")
                structure_align.insert(0, pos_nb)
                sequence_align.insert(0, self.sequence[i - 1])
                i = i - 1
                j = j - 1
            # Gap
            else:
                if square == self.matrix[i - 1, j] + self.gap:
                    print("gap structure")
                    structure_align.insert(0, "-")
                    sequence_align.insert(0, self.sequence[i - 1])
                    i = i - 1
                elif square == self.matrix[i, j - 1] + self.gap:
                    print("gap sequence")
                    structure_align.insert(0, pos_nb)
                    sequence_align.insert(0, "-")
                    j = j - 1

        return sequence_align, structure_align

    def print_alignment(self, score, sequence_align,
                        structure_align, max_char=50
                        ):
        """
        Display an alignment.

        Parameters
        ----------
        score : float
            Score of alignment.
        sequence_align : list
            Aligned sequence.
        structure_align : list
            Aligned structure.
        max_char : int
            Max characters to show in one line.
            Default is 50.
        """
        # Mise en forme du résultat
        f_seq_align = ""
        f_struct_align = ""
        for index, value in enumerate(sequence_align):
            max_len = len(str(structure_align[index]))
            f_seq_align += f"{value:^{max_len}} "
            f_struct_align += f"{structure_align[index]} "

        # Affichage
        print(f"Optimized alignment, score= {score:.2f}:")
        i = 0
        while i < min(len(f_struct_align), len(f_seq_align)):
            j = i + max_char
            while (j < min(len(f_struct_align), len(f_seq_align))
                   and (f_struct_align[j] != ' ' or f_seq_align[j] != ' ')
                   ):
                j += 1
            if j < min(len(f_struct_align), len(f_seq_align)):
                j = j + 1

            print(f_struct_align[i:j])
            print(f_seq_align[i:j])
            if len(f_struct_align) > max_char:
                print()  # Ligne vide pour espacer les alignements
            i = j


def clean_dope_data(filename):
    """Clean DOPE potentials file to keep only alpha carbon information.

    Parameters
    ----------
    filename : str
        File of DOPE potentials.

    Returns
    -------
    pandas.DataFrame
        Dataframe containing DOPE potentials based on amino acids
        and distance.
            - 'res1' : str
                Three letter code of the first amino acid.
            - 'res2' : str
                Three letter code of the second amino acid.
            - All other columns : float
                Column names are distance from 0.25 to 14.75 with 0.5
                incrementation. Values are the DOPE potentials for said
                distance between first and second amino acids.
    """
    ca_matrix = []

    with open(filename, "r", encoding='UTF-8') as dope:
        for ligne in dope:
            if ligne[3:7].strip() == "CA" and ligne[11:14].strip() == "CA":
                ca_matrix.append(ligne.split())

    columns = ["res1", "temp1", "res2", "temp2"] + list(
        np.arange(0.25, 15, 0.5)
    )
    dope_score = pd.DataFrame(ca_matrix, columns=columns)
    dope_score = dope_score.drop(["temp1", "temp2"], axis=1)

    return dope_score


def get_fasta_sequence(filename):
    """Retrieve sequence from FASTA file.

    Parameters
    ----------
    filename : str
        FASTA file.

    Returns
    -------
    str
        Obtained sequence.
    """
    sequence = ""
    with open(filename, "r", encoding='UTF-8') as fasta:
        for ligne in fasta:
            if ligne.startswith(">"):
                continue
            sequence += ligne.strip()
    return sequence

In [17]:
i = 1
j = 0
print((i, j) not in [(0, 0), (0, 1), (1, 0)])

False


# Tests

In [18]:
# Informations générales
GAP = 0
DOPE_FILE = "../data/dope.par"
if not Path(DOPE_FILE).exists():
    sys.exit("dope.par missing. DOPE potentials cannot be retrieved.")
DOPE_MATRIX = clean_dope_data(DOPE_FILE)

# Récupération des arguments
if len(sys.argv) < 3:
    sys.exit("Missing arguments: Template and/or Query")

# Vérification du template
PDB_FILE = "../data/1bw6.pdb"
if PDB_FILE.split('.')[-1] != 'pdb':
    sys.exit("Wrong argument: Template is not a PDB file.")
elif not Path(PDB_FILE).exists():
    sys.exit("Wrong argument: Template does not exist.")

TEMPLATE = Template(PDB_FILE)

# Construction du stockage des résultats
RESULTS = []

# Vérification des query
FASTA_FILE = "../data/rcsb_pdb_1BK8.fasta"
if FASTA_FILE.split('.')[-1] != 'fasta':
    sys.exit("Wrong argument: Query is not a fasta file.")
elif not Path(FASTA_FILE).exists():
    sys.exit("Wrong argument: Query does not exist.")

QUERY = get_fasta_sequence(FASTA_FILE)

In [19]:
# Algorithme principal
HIGH_LEVEL = HighLevelMatrix(GAP, QUERY, TEMPLATE, DOPE_MATRIX)
MAX_SCORE = HIGH_LEVEL.fill_matrix()

In [20]:
print(HIGH_LEVEL.lines)
print(HIGH_LEVEL.columns)
print(HIGH_LEVEL.matrix)
print(HIGH_LEVEL.score_matrix)

51
57
[[   0.      0.      0.   ...    0.      0.      0.  ]
 [   0.     -4.27   -4.56 ...   -8.25   -8.25   -8.25]
 [   0.     -5.04  -11.73 ...  -23.46  -23.46  -23.46]
 ...
 [   0.     -5.04  -12.49 ... -429.69 -429.69 -429.69]
 [   0.     -5.04  -12.49 ... -433.93 -434.35 -434.35]
 [   0.     -5.04  -12.49 ... -439.22 -439.98 -442.72]]
[[ -4.27  -4.56  -6.47 ...  -2.26  -1.74   0.  ]
 [ -5.04  -7.46 -11.82 ...  -4.12  -1.84  -0.02]
 [ -3.14  -4.88  -6.09 ...  -4.    -3.22  -1.48]
 ...
 [ -1.94  -3.27  -3.89 ...  -7.19  -5.06  -4.7 ]
 [ -1.46  -3.02  -3.17 ...  -6.29  -4.66  -4.25]
 [  0.    -1.44  -2.   ... -10.09  -6.05  -8.37]]


In [21]:
ALIGN_SEQ, ALIGN_STRUCT = HIGH_LEVEL.get_alignment()
HIGH_LEVEL.print_alignment(MAX_SCORE, ALIGN_SEQ, ALIGN_STRUCT)

# Stockage des résultats
RESULTS.append([FASTA_FILE, MAX_SCORE, ALIGN_STRUCT, ALIGN_SEQ])

RESULTS = pd.DataFrame(RESULTS, columns=['QUERY', 'MAX_SCORE',
                                             'ALIGN_SEQ', 'ALIGN_STRUCT'])

# Organiser selon le score croissant
RESULTS = RESULTS.sort_values(by='MAX_SCORE', ascending=True)

# Sauvegarde des résultats
TEMPLATE_NAME = PDB_FILE.split('.')[-2].split('/')[-1]
RESULTS.to_csv(f"../results/ddt_{TEMPLATE_NAME}.csv", sep=';', index=False)

50 56
match
49 55
match
48 54
match
47 53
match
46 52
match
45 51
match
44 50
match
43 49
match
42 48
match
41 47
match
40 46
match
39 45
match
38 44
match
37 43
match
36 42
match
35 41
match
34 40
match
33 39
match
32 38
match
31 37
match
30 36
match
29 35
match
28 34
match
27 33
match
26 32
match
25 31
match
24 30
match
23 29
match
22 28
match
21 27
match
20 26
gap sequence
20 25
gap sequence
20 24
gap sequence
20 23
match
19 22
match
18 21
match
17 20
match
16 19
match
15 18
match
14 17
match
13 16
match
12 15
match
11 14
match
10 13
match
9 12
match
8 11
match
7 10
gap sequence
7 9
match
6 8
match
5 7
match
4 6
gap sequence
4 5
match
3 4
match
2 3
match
1 2
match
0 1
gap sequence
Optimized alignment, score= -442.72:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 
- L C N E - R P S -  Q  T  W  S  G  N  C  G  N  T  

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 
A  H  C  -  -  -  D  K  Q  C  Q  D  W  E  K  A  S  

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 
H  G  A 

In [9]:
ALIGN_SEQ, ALIGN_STRUCT = HIGH_LEVEL.get_alignment()
HIGH_LEVEL.print_alignment(MAX_SCORE, ALIGN_SEQ, ALIGN_STRUCT)

# Stockage des résultats
RESULTS.append([FASTA_FILE, MAX_SCORE, ALIGN_STRUCT, ALIGN_SEQ])

RESULTS = pd.DataFrame(RESULTS, columns=['QUERY', 'MAX_SCORE',
                                             'ALIGN_SEQ', 'ALIGN_STRUCT'])

# Organiser selon le score croissant
RESULTS = RESULTS.sort_values(by='MAX_SCORE', ascending=True)

# Sauvegarde des résultats
TEMPLATE_NAME = PDB_FILE.split('.')[-2].split('/')[-1]
RESULTS.to_csv(f"../results/ddt_{TEMPLATE_NAME}.csv", sep=';', index=False)

50 56
match
49 55
match
48 54
match
47 53
match
46 52
match
45 51
match
44 50
match
43 49
match
42 48
match
41 47
match
40 46
match
39 45
match
38 44
match
37 43
match
36 42
match
35 41
match
34 40
match
33 39
match
32 38
match
31 37
match
30 36
match
29 35
match
28 34
match
27 33
match
26 32
match
25 31
match
24 30
match
23 29
match
22 28
match
21 27
match
20 26
gap sequence
20 25
gap sequence
20 24
gap sequence
20 23
match
19 22
match
18 21
match
17 20
match
16 19
match
15 18
match
14 17
match
13 16
match
12 15
match
11 14
match
10 13
match
9 12
match
8 11
match
7 10
gap sequence
7 9
match
6 8
match
5 7
match
4 6
gap sequence
4 5
match
3 4
match
2 3
match
1 2
match
0 1
match
-1 0
gap structure
-2 0
gap structure
-3 0
gap structure
-4 0
gap structure
-5 0
gap structure
-6 0
gap structure
-7 0
gap structure
-8 0
gap structure
-9 0
gap structure
-10 0
gap structure
-11 0
gap structure
-12 0
gap structure
-13 0
gap structure
-14 0
gap structure
-15 0
gap structure
-16 0
gap structure
-17

IndexError: index -51 is out of bounds for axis 0 with size 50

In [45]:
print(TEMPLATE)

position 0-1, coor( 34.174, -6.97, 4.915)
position 1-2, coor( 34.906, -3.22, 5.235)
position 2-3, coor( 36.94, -1.489, 2.512)
position 3-4, coor( 37.135, -3.558, -0.757)
position 4-5, coor( 35.59, -0.585, -2.651)
position 5-6, coor( 36.948, 2.96, -1.989)
position 6-7, coor( 38.725, 3.544, 1.386)
position 7-8, coor( 39.704, 1.842, 4.702)
position 8-9, coor( 41.092, 2.575, 8.238)
position 9-10, coor( 44.525, 4.242, 8.774)
position 10-11, coor( 45.689, 1.19, 10.824)
position 11-12, coor( 45.228, -1.41, 8.027)
position 12-13, coor( 46.325, 1.252, 5.467)
position 13-14, coor( 49.867, 1.149, 6.946)
position 14-15, coor( 50.065, -2.611, 6.184)
position 15-16, coor( 49.063, -1.849, 2.556)
position 16-17, coor( 52.017, 0.588, 2.211)
position 17-18, coor( 54.399, -2.149, 3.49)
position 18-19, coor( 53.181, -4.764, 0.954)
position 19-20, coor( 52.86, -2.261, -1.961)
position 20-21, coor( 56.285, -0.533, -1.652)
position 21-22, coor( 58.289, -3.667, -0.641)
position 22-23, coor( 57.883, -5.178, -4

In [14]:
# Séquence 'query'
FASTA_FILE = "../data/5AWL.fasta"
QUERY = get_fasta_sequence(FASTA_FILE)

# Structure 'template'
PDB_FILE = "../data/5awl_fake.pdb"
TEMPLATE = Template(PDB_FILE)

# Matrice DOPE
DOPE_FILE = "../data/dope.par"
DOPE_MATRIX = clean_DOPE_data(DOPE_FILE)

# Information(s) supplémentaire(s)
GAP = 0

In [4]:
DIST_MATRIX = TEMPLATE.build_dist_matrix()
FROZEN = {'seq_id': 0, 'pos_id': 0}
SEQUENCE = "YYDPETGTWY"
GAP = 0
    
LOW_TEST = LowLevelMatrix(GAP, FROZEN, DIST_MATRIX, DOPE_MATRIX, SEQUENCE)
MAX_SCORE = LOW_TEST.fill_matrix()
print(MAX_SCORE)
print(LOW_TEST.matrix)

-3.0
[[ 0.    0.    0.    0.    0.    0.    0.    0.    0.    0.  ]
 [ 0.   -1.41 -1.41 -1.41 -1.41 -1.41 -1.41 -1.41 -1.41 -1.41]
 [ 0.   -1.69 -1.69 -1.69 -1.69 -1.69 -1.69 -1.69 -1.85 -1.85]
 [ 0.   -1.69 -1.85 -1.88 -1.88 -1.88 -1.88 -1.88 -2.1  -2.26]
 [ 0.   -1.69 -1.85 -1.88 -1.92 -1.92 -1.92 -1.92 -2.35 -2.57]
 [ 0.   -1.69 -1.85 -1.9  -1.92 -1.95 -1.97 -1.97 -2.35 -2.77]
 [ 0.   -1.69 -1.85 -1.9  -1.92 -1.95 -1.98 -1.98 -2.39 -2.77]
 [ 0.   -1.69 -1.85 -1.9  -1.94 -1.95 -2.   -2.   -2.4  -2.81]
 [ 0.   -1.69 -1.85 -1.97 -1.99 -1.99 -2.07 -2.07 -2.48 -2.88]
 [ 0.   -1.69 -1.85 -1.97 -1.99 -2.06 -2.07 -2.07 -2.59 -3.  ]]


In [5]:
print(LOW_TEST.lines)

10


In [6]:
# Si pas de message, ça tourne
for i in range(10):
    for j in range(10):
        FROZEN = {'seq_id': i, 'pos_id': j}
            
        LOW_TEST = LowLevelMatrix(GAP, FROZEN, DIST_MATRIX, DOPE_MATRIX, SEQUENCE)
        MAX_SCORE = LOW_TEST.fill_matrix()

In [15]:
# Algorithme principal
HIGH_LEVEL = HighLevelMatrix(GAP, QUERY, TEMPLATE, DOPE_MATRIX)
MAX_SCORE = HIGH_LEVEL.fill_matrix()
print(MAX_SCORE)
ALIGN_SEQ, ALIGN_STRUCT = HIGH_LEVEL.get_alignment()
print(ALIGN_SEQ)
print(ALIGN_STRUCT)

-45.35
10 10
match
9 9
match
8 8
match
7 7
match
6 6
match
5 5
match
4 4
match
3 3
match
2 2
match
1 1
match
['Y', 'Y', 'D', 'P', 'E', 'T', 'G', 'T', 'W', 'Y']
['99', '2', '3', '4', '5', '6', '7', '8', '9', '10']


In [None]:
def afficher_alignements(f_struct_align, f_seq_align, max_chars=50):
    i = 0
    while i < min(len(f_struct_align), len(f_seq_align)):
        # Cherche le prochain espace commun après max_chars
        j = i + max_chars
        while j < min(len(f_struct_align), len(f_seq_align)) and (f_struct_align[j] != ' ' or f_seq_align[j] != ' '):
            j += 1
        j = j + 1 if j < min(len(f_struct_align), len(f_seq_align)) else j  # Inclure l'espace
        print(f_struct_align[i:j])
        print(f_seq_align[i:j])
        print()  # Ligne vide pour espacer les alignements
        i = j

In [31]:
def print_alignment(score, sequence_align, structure_align, max_char=50):
    """
    Displays an alignment.

    Parameters
    ----------
    score : float
        Score of alignment.
    sequence_align : list
        Aligned sequence.
    structure_align : list
        Aligned structure.
    max_char : int
        Max characters to show in one line.
        Default is 50.
    """
    # Mise en forme du résultat
    f_seq_align = ""
    f_struct_align = ""
    for index in range(len(sequence_align)):
        max_len = len(str(structure_align[index]))
        f_seq_align += f"{sequence_align[index]:^{max_len}} "
        f_struct_align += f"{structure_align[index]} "

    # Affichage
    print(f"Optimized alignment, score= {score:.2f}:")
    i = 0
    while i < min(len(f_struct_align), len(f_seq_align)):
        j = i + max_char
        while j < min(len(f_struct_align), len(f_seq_align)) and (f_struct_align[j] != ' ' or f_seq_align[j] != ' '):
            j += 1
        if j < min(len(f_struct_align), len(f_seq_align)):
            j = j + 1
        else:
            j = j
            
        print(f_struct_align[i:j])
        print(f_seq_align[i:j])
        print()  # Ligne vide pour espacer les alignements
        i = j

# Algorithme principal
HIGH_LEVEL = HighLevelMatrix(GAP, QUERY, TEMPLATE, DOPE_MATRIX)
MAX_SCORE = HIGH_LEVEL.fill_matrix()
ALIGN_SEQ, ALIGN_STRUCT = HIGH_LEVEL.get_alignment()
print_alignment(MAX_SCORE, ALIGN_SEQ, ALIGN_STRUCT)

10 10
match
9 9
match
8 8
match
7 7
match
6 6
match
5 5
match
4 4
match
3 3
match
2 2
match
1 1
match
Optimized alignment, score= -45.35:
99 2 3 4 5 6 7 8 9 10 
Y  Y D P E T G T W Y  



In [17]:
def afficher_alignements(f_struct_align, f_seq_align, max_chars=50):
    # Diviser les chaînes en morceaux de 'max_chars' caractères
    struct_chunks = [f_struct_align[i:i+max_chars] for i in range(0, len(f_struct_align), max_chars)]
    seq_chunks = [f_seq_align[i:i+max_chars] for i in range(0, len(f_seq_align), max_chars)]

    # Afficher les morceaux l'un au-dessus de l'autre
    for struct, seq in zip(struct_chunks, seq_chunks):
        print(struct)
        print(seq)

# Exemples de chaînes
f_struct_align = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
f_seq_align = "123456789012345678901234567890123456789012345678901234567890"

# Appel de la fonction
afficher_alignements(f_struct_align, f_seq_align)


ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWX
12345678901234567890123456789012345678901234567890
YZABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890


In [19]:
def couper_au_prochain_espace(struct, seq, max_chars):
    """
    Coupe les deux chaînes au prochain espace commun trouvé après max_chars caractères.
    """
    # Commence la recherche à partir de max_chars
    for i in range(max_chars, min(len(struct), len(seq))):
        if struct[i] == ' ' and seq[i] == ' ':
            return i + 1  # Inclure l'espace dans la coupure
    # Si aucun espace commun trouvé, retourner max_chars par défaut
    return max_chars

def afficher_alignements(f_struct_align, f_seq_align, max_chars=50):
    """
    Affiche f_struct_align et f_seq_align l'un au-dessus de l'autre en coupant à la position d'un espace commun
    sur les deux chaînes, avec une limite de max_chars par ligne.
    """
    struct_index = 0
    seq_index = 0
    while struct_index < len(f_struct_align) and seq_index < len(f_seq_align):
        # Trouver la prochaine coupure en fonction de l'espace commun
        cut_position = couper_au_prochain_espace(
            f_struct_align[struct_index:], 
            f_seq_align[seq_index:], 
            max_chars
        )
        # Afficher les morceaux coupés
        print(f_struct_align[struct_index:struct_index + cut_position])
        print(f_seq_align[seq_index:seq_index + cut_position])

        # Mettre à jour les indices
        struct_index += cut_position
        seq_index += cut_position

# Exemples de chaînes avec des espaces
f_struct_align = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A B C D E F G H I J K L M N O P Q R S T U V W X"
f_seq_align = "1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0"

# Appel de la fonction
afficher_alignements(f_struct_align, f_seq_align)

A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 
A B C D E F G H I J K L M N O P Q R S T U V W X
7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0


In [21]:
def afficher_alignements(f_struct_align, f_seq_align, max_chars=50):
    i = 0
    while i < min(len(f_struct_align), len(f_seq_align)):
        # Cherche le prochain espace commun après max_chars
        j = i + max_chars
        while j < min(len(f_struct_align), len(f_seq_align)) and (f_struct_align[j] != ' ' or f_seq_align[j] != ' '):
            j += 1
        j = j + 1 if j < min(len(f_struct_align), len(f_seq_align)) else j  # Inclure l'espace
        print(f_struct_align[i:j])
        print(f_seq_align[i:j])
        print()  # Ligne vide pour espacer les alignements
        i = j

# Exemples de chaînes avec des espaces
f_struct_align = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A B C D E F G H I J K L M N O P Q R S T U V W X"
f_seq_align = "1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0"

# Appel de la fonction
afficher_alignements(f_struct_align, f_seq_align)


A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 

A B C D E F G H I J K L M N O P Q R S T U V W X
7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0



In [9]:
print(HIGH_LEVEL.score_matrix)

[[-3.   -3.52 -3.92 -2.99 -2.41 -2.52 -2.08 -1.85 -1.69  0.  ]
 [-3.   -4.93 -3.96 -3.01 -2.39 -2.59 -2.1  -1.71 -2.21 -0.52]
 [-3.11 -5.16 -5.73 -4.6  -3.91 -4.41 -3.81 -3.38 -4.03 -2.13]
 [-2.94 -4.91 -5.48 -5.5  -4.86 -4.97 -4.76 -4.33 -4.59 -2.75]
 [-2.54 -4.83 -5.27 -4.27 -4.36 -4.71 -4.77 -4.33 -4.08 -2.49]
 [-2.7  -4.5  -4.78 -4.66 -4.53 -5.05 -5.44 -4.99 -4.26 -2.61]
 [-2.39 -4.07 -4.16 -4.32 -3.98 -4.64 -5.   -4.72 -4.21 -2.57]
 [-1.93 -3.67 -3.86 -4.05 -4.21 -4.62 -5.41 -4.67 -4.31 -2.85]
 [-1.45 -2.9  -3.02 -3.66 -3.72 -4.05 -4.9  -4.39 -4.09 -2.74]
 [ 0.   -1.41 -1.73 -2.04 -2.23 -2.61 -3.46 -3.18 -2.77 -3.02]]


In [10]:
# Algorithme principal
test_seq = "WWWWWWWWWW"

HIGH_LEVEL = HighLevelMatrix(GAP, test_seq, TEMPLATE, DOPE_MATRIX)
MAX_SCORE = HIGH_LEVEL.fill_matrix()
print(MAX_SCORE)
ALIGN_SEQ, ALIGN_STRUCT = HIGH_LEVEL.get_alignment()
print(ALIGN_SEQ)
print(ALIGN_STRUCT)

-35.449999999999996
10 10
match
9 9
match
8 8
match
7 7
match
6 6
match
5 5
match
4 4
match
3 3
match
2 2
match
1 1
match
WWWWWWWWWW
992345678910
