# Multiple Sequence Alignment using Genetic Algorithms

In [1]:
import pandas as pd
import numpy as np 
from random import randint, uniform
from dataclasses import dataclass

from Bio.Align import PairwiseAligner
from Bio.SubsMat.MatrixInfo import blosum62
from utils import Utils
from ga import GA_MSA
from vose_sampler import VoseAlias
from Bio.pairwise2 import align

In [2]:
Utils.get_interval_gaps("A-----BC--", 3)

[1, 2, 3, 4, 5]

In [3]:
seq1 = "CARTABLANCHE"
seq2 = "CARTE"

In [4]:
sequences = ["ACTGCAACG", "ATCTGCTAG", "ACCCGAGACTG", "CGTAAACGT"]

In [5]:
GA_MSA.compute_pairwise_alignments(sequences)

array([['ACTGCAACG', 'A-CTGCAACG', 'ACTGC-A-AC-G', 'ACTG-CAACG-'],
       ['ATCTGCTA-G', 'ATCTGCTAG', 'ATCTG---CTAG', 'ATCTGCTA---G-'],
       ['ACC-CGAGACTG', 'ACCCGAGACT-G', 'ACCCGAGACTG', 'ACCCG-AGACTG-'],
       ['-C-GTAAACGT', '--C-G-TAAACGT', '---CGTAAAC-GT', 'CGTAAACGT']],
      dtype=object)

In [6]:
ga = GA_MSA(generations=5, mutation_rate=0.5)
ga.run(sequences=sequences)

Input matrix:
ACTGCAACG
ATCTGCTAG
ACCCGAGACTG
CGTAAACGT

Population 1:
A-CTGCAACG
ATCTGCTAG
ACC-CGAGACTG
CGTAAACGT

Population 2:
ACTGC-A-AC-G
ATCTGCTAG
ACCCGAGACT-G
--C-G-TAAACGT

Population 3:
ACTG-CAACG-
ATCTGCTA-G
ACCCG-AGACTG-
---CGTAAAC-GT

Population 4:
ACTG-CAACG-
ATCTGCTA-G
ACCCGAGACT-G
-C-GTAAACGT

Population 5:
ACTGC-A-AC-G
ATCTGCTA---G-
ACCCGAGACT-G
--C-G-TAAACGT

Population 6:
ACTGCAACG
ATCTGCTA---G-
ACC-CGAGACTG
CGTAAACGT

Population 7:
ACTGCAACG
ATCTG---CTAG
ACCCG-AGACTG-
---CGTAAAC-GT

Population 8:
ACTG-CAACG-
ATCTGCTA---G-
ACC-CGAGACTG
-C-GTAAACGT

Population 9:
ACTG-CAACG-
ATCTGCTAG
ACCCGAGACT-G
CGTAAACGT

Population 10:
ACTGC-A-AC-G
ATCTGCTAG
ACCCGAGACT-G
-C-GTAAACGT


Population Fitness Score: 649.0999999999999

Organism #1
Fitness Score: 35.6
--A-CTGCAACG
A-TCTG-CTAG-
ACC-CGAGACTG
-CGTAAACG-T-

Organism #2
Fitness Score: 46.8
ACT-GC-A-AC-G
-AT-CTG--CTAG
ACCC-GAGACT-G
--C-G-TAAACGT

Organism #3
Fitness Score: 101.3
ACTG--CAACG--
ATC-TGCTA--G-
ACCCG-AGACTG-
---CGTAA

119.2

In [None]:
round(0.51)