# Multiple Sequence Alignment using Genetic Algorithms

In [5]:
import pandas as pd
import numpy as np 
from random import randint, uniform
from dataclasses import dataclass

from Bio.Align import PairwiseAligner
from Bio.SubsMat.MatrixInfo import blosum62
from utils import Utils
from ga import GA_MSA
from vose_sampler import VoseAlias
from Bio.pairwise2 import align

In [6]:
Utils.get_interval_gaps("A-----BC--", 3)

[1, 2, 3, 4, 5]

In [7]:
seq1 = "CARTABLANCHE"
seq2 = "CARTE"

In [8]:
sequences = ["ACTGCAACG", "ATCTGCTAG", "ACCCGAGACTG", "CGTAAACGT"]

In [9]:
GA_MSA.compute_pairwise_alignments(sequences)

array([['ACTGCAACG', 'A-CTGCAACG', 'ACTGC-A-AC-G', 'ACTG-CAACG-'],
       ['ATCTGCTA-G', 'ATCTGCTAG', 'ATCTG---CTAG', 'ATCTGCTA---G-'],
       ['ACC-CGAGACTG', 'ACCCGAGACT-G', 'ACCCGAGACTG', 'ACCCG-AGACTG-'],
       ['-C-GTAAACGT', '--C-G-TAAACGT', '---CGTAAAC-GT', 'CGTAAACGT']],
      dtype=object)

In [10]:
ga = GA_MSA(generations=5, mutation_rate=0.5)
ga.run(sequences=sequences)

Input matrix:
ACTGCAACG
ATCTGCTAG
ACCCGAGACTG
CGTAAACGT

Population 1:
ACTGC-A-AC-G
ATCTGCTA---G-
ACCCG-AGACTG-
CGTAAACGT

Population 2:
A-CTGCAACG
ATCTG---CTAG
ACCCG-AGACTG-
-C-GTAAACGT

Population 3:
A-CTGCAACG
ATCTGCTAG
ACCCG-AGACTG-
-C-GTAAACGT

Population 4:
A-CTGCAACG
ATCTGCTA-G
ACCCG-AGACTG-
CGTAAACGT

Population 5:
ACTGCAACG
ATCTG---CTAG
ACCCGAGACT-G
CGTAAACGT

Population 6:
ACTGCAACG
ATCTGCTA-G
ACCCG-AGACTG-
-C-GTAAACGT

Population 7:
ACTGCAACG
ATCTGCTA-G
ACCCGAGACTG
---CGTAAAC-GT

Population 8:
A-CTGCAACG
ATCTGCTA---G-
ACCCGAGACTG
---CGTAAAC-GT

Population 9:
A-CTGCAACG
ATCTGCTA---G-
ACCCG-AGACTG-
-C-GTAAACGT

Population 10:
ACTG-CAACG-
ATCTGCTA-G
ACCCGAGACTG
---CGTAAAC-GT


Population Fitness Score: 543.6999999999999

Organism #1
Fitness Score: 92.6
-ACTGC-A-AC-G
ATCTGCTA---G-
ACCCG-AGACTG-
C-GT--AAAC-GT

Organism #2
Fitness Score: 75.2
A--CT-GCAACG-
AT-CTG---CTAG
ACCCG-AGACTG-
-C-GTAAA--CGT

Organism #3
Fitness Score: 68.3
A-CT-GC--AACG
-ATCTGC-TA-G-
ACCCG-AGACTG-
--C--GTAA

130.5