In [1]:
from processing.DataLoader import DataLoaderFactory
from processing.AlignerBuilder import *
from processing.Optimizer import *

In [2]:
import numpy as np

## Generación aleatoria de secuencias de aminoácidos

In [3]:
sequenceLoader = DataLoaderFactory.get_loader("random")

In [4]:
sequence1 = sequenceLoader.load(70)
sequence2 = sequenceLoader.load(65)

print(f"Sequence 1: {sequence1}")
print(f"Sequence 2: {sequence2}")

Sequence 1: DHCPFAAMEPFNNERLLMYSHTVLITSKSIFRMRQCSIQLGWYHAPNWRSPQLKCHRSLRHDDGTRMPWI
Sequence 2: NTTSIISDRHRVGFYPVAGQCSSNSYDDCTLFSGQQHYRDYTIIYCPLHDVLGSWGIECKQYPLL


### Alineamiento de secuencias con ajuste manual

In [5]:
aligner = AlignerBuilder().build(
                                AlignerArgs(
                                    match_score=3,
                                    mismatch_score=-1,
                                    target_internal_extend_gap_score=-3,
                                    target_internal_open_gap_score=-5
                                )
                            )

In [6]:
print(aligner.args())

AlignerArgs(match_score=3.0, 
                    mismatch_score=-1.0, 
                    target_internal_open_gap_score=-5.0, 
                    target_internal_extend_gap_score=-3.0, 
                    target_left_open_gap_score=0.0, 
                    target_left_extend_gap_score=0.0, 
                    target_right_open_gap_score=0.0, 
                    target_right_extend_gap_score=0.0, 
                    query_internal_open_gap_score=0.0, 
                    query_internal_extend_gap_score=0.0, 
                    query_left_open_gap_score=0.0, 
                    query_left_extend_gap_score=0.0, 
                    query_right_open_gap_score=0.0, 
                    query_right_extend_gap_score=0.0)


In [7]:
alignments = aligner.align(sequence1, sequence2)

In [8]:
def get_matches(alignment):
    matches = 0
    seq1 = alignment[0]
    seq2 = alignment[1]

    for i in range(len(seq1)):
        if seq1[i] == seq2[i]:
            matches += 1

    return matches

In [9]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 14
Score: 36.0
Alignment:
target            2 CPFAAMEPFNNERLLMYSHTVLITSKSIFRMRQCSIQLGWYHAPNWRSPQLKCHRSL 59
                  0 |------|------|---|.||-.|..|----.|-.|---|-------|-|-----| 57
query            45 C------P------L---HDVL-GSWGI----EC-KQ---Y-------P-L-----L 65



### Alineamiento de secuencias con algoritmo genético

In [10]:
from processing.Optimizer import *

In [11]:
def fitness_function(aligner):
    matches = 0
    
    alignment = aligner.align(sequence1, sequence2)[0]
    seq1 = alignment[0]
    seq2 = alignment[1]

    for i in range(len(seq1)):
        if seq1[i] == seq2[i]:
            matches += 1

    return matches

In [12]:
geneticAlgorithm = GeneticAlgorithm(1200, 5, sequence1, sequence2, fitness_function)
aligner = geneticAlgorithm.run()

In [13]:
print(aligner.args())

AlignerArgs(match_score=9.229707884146155, 
                    mismatch_score=-8.756488005715608, 
                    target_internal_open_gap_score=-3.224088645118397, 
                    target_internal_extend_gap_score=-0.4563923477015086, 
                    target_left_open_gap_score=-1.7963988690511445, 
                    target_left_extend_gap_score=-9.717966725467305, 
                    target_right_open_gap_score=-0.9989004029054005, 
                    target_right_extend_gap_score=-7.451833494085273, 
                    query_internal_open_gap_score=-1.0838475742028097, 
                    query_internal_extend_gap_score=-0.15781246997672915, 
                    query_left_open_gap_score=-3.505197032576329, 
                    query_left_extend_gap_score=-8.056491461288694, 
                    query_right_open_gap_score=-3.628544636973369, 
                    query_right_extend_gap_score=-3.1911940736988385)


In [14]:
alignment = aligner.align(sequence1, sequence2)

for alignment in alignment:
    print(f"Alignment: {alignment}")
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    break

Alignment: target           12 NERLLMYSHTVLITSKSI---FR-MR--------QCS--------IQL--G---WYHAPN
                  0 |--------|---||--|----|--|--------|||----------|--|----|----
query             0 N--------T---TS--IISD-RH-RVGFYPVAGQCSSNSYDDCT--LFSGQQH-Y----

target           47 WR-------SPQLKCHRSLRHD--DG-TRMPW-I 70
                 60 -|--------|-------|-||---|-----|-| 94
query            38 -RDYTIIYC-P-------L-HDVL-GS----WGI 57

Matches: 21
Score: 131.83926033156794


### Alineamiento de secuencias con matriz de puntuación

In [15]:
aligner = AlignerBuilder().build()

- Para la matriz de puntuación BLOSUM62

In [16]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum62")

In [17]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 20
Score: 133.0
Alignment:
target            0 D-HCP---FAAME-PF-----N-NERLLM-YSH---TVLITSKSIFRMRQCSIQLGWY--
                  0 |-|-----|-----|------.-|------|-----|-|------|-----|---|----
query             7 DRH--RVGF----YP-VAGQCSSN-----SY--DDCT-L------F-----S---G--QQ

target           43 HAPNWR--SPQLK--CHRS-LRHDD--GTRMPW-I 70
                 60 |---.|--.--.---|----|-||---|.---|-| 95
query            36 H---YRDYT--I-IYC---PL-HD-VLGS---WGI 57



- Para la matriz de puntuación PAM250

In [18]:
alignments = aligner.align(sequence1, sequence2, matrix="pam250")

In [19]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 15
Score: 160.0
Alignment:
target            0 D-H----------CP---F---AAMEPFN-NE--RLLM-YSHTVLITSKSIFRMRQCSIQ
                  0 |-|----------|.---.---.-.--|.-..--|----|--|--|----|.----|...
query             7 DRHRVGFYPVAGQCSSNSYDDCT-L--FSGQQHYR---DY--T--I----IY----CPLH

target           39 --LGWYHAPNWRSPQLK-CHR--SLRHDDGTRM 67
                 60 --||---.--|-.--.--|-.--.|-------. 93
query            49 DVLG---S--W-G--I-EC-KQYPL-------L 65



- Para la matriz de puntuación BLOSUM50

In [20]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum50")

In [21]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 19
Score: 174.0
Alignment:
target            0 D-HCP---FAAME-PFNN---ERLLMY-SHTVLITSK-SIFRMRQ--CSIQL--GWY--H
                  0 |-|-----|-----|------.------|------|--|-.------|.--|--|----|
query             7 DRH--RVGF----YP---VAGQ-----CS------S-NS-Y----DDCT--LFSG--QQH

target           44 APNWR--SPQLK--CHRS-LRHDD--GTRMPW-I 70
                 60 ---.|--.--.---|----|-||---|.---|-| 94
query            37 ---YRDYT--I-IYC---PL-HD-VLGS---WGI 57



- Para la matriz de puntuación BLOSUM80

In [22]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum80")

In [23]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 20
Score: 210.0
Alignment:
target            0 D-HCP---FAAME-PF-----N-NERLLM-YSH---TVLITSKSIFRMR--QCSIQLGWY
                  0 |-|-----|-----|------.-|------|-----|-|------|-----|---|----
query             7 DRH--RVGF----YP-VAGQCSSN-----SY--DDCT-L------F---SGQ---Q----

target           43 HAPNWR--SPQLK--CHRS-LRHDD--GTRMPW-I 70
                 60 |---.|--.--.---|----|-||---|.---|-| 95
query            36 H---YRDYT--I-IYC---PL-HD-VLGS---WGI 57



- Para la matriz de puntuación pam30

In [24]:
alignments = aligner.align(sequence1, sequence2, matrix="pam30")

In [25]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 21
Score: 170.0
Alignment:
target           11 NNERLLMYSHTVLITSKSIF---RM-RQCSIQLGW-YHAP-------NWRSPQLK---C-
                  0 |---------|---||--|----|--|---.--|--|--|-------|--|-------|-
query             0 N---------T---TS--I-ISDR-HR---V--G-FY--PVAGQCSSN--S----YDDCT

target           55 ------H-RS--------LRHDD--GTRMP-W-I 70
                 60 ------|-|---------|-||---|-----|-| 94
query            30 LFSGQQHYR-DYTIIYCPL-HD-VLG----SWGI 57



## Descarga de ficheros FASTA

In [26]:
sequenceLoader = DataLoaderFactory.get_loader("api")

In [27]:
sequence1 = sequenceLoader.load(('ABG47031.1',))[0].seq
sequence2 = sequenceLoader.load(('AUJ50941.1',))[0].seq

print(f"Sequence 1: {sequence1}")
print(f"Sequence 2: {sequence2}")

Sequence 1: MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFR
Sequence 2: MKYNEINNEGVEKLMDIFYAKIRTHEQLGPIFNGAVGIDDASWERHKEKIAKFWKTMLLNENLYMGNPVQPHINLLPFDIKLFDVWLDLFKECLDQVFEEKASEHFYEVACNIAKNFKAVLFQQ


### Alineamiento de secuencias con algoritmo genético

In [28]:
geneticAlgorithm = GeneticAlgorithm(1400, 20, sequence1, sequence2, fitness_function)
aligner = geneticAlgorithm.run()

In [29]:
print(aligner.args())

AlignerArgs(match_score=4.045293730863058, 
                    mismatch_score=-4.715424683647572, 
                    target_internal_open_gap_score=-0.04160746112998104, 
                    target_internal_extend_gap_score=-0.68089970660967, 
                    target_left_open_gap_score=-2.5207757657596774, 
                    target_left_extend_gap_score=-3.8349560404192173, 
                    target_right_open_gap_score=-4.776379546439612, 
                    target_right_extend_gap_score=-0.9534176940238615, 
                    query_internal_open_gap_score=-0.4137592881127883, 
                    query_internal_extend_gap_score=-0.08009058594149465, 
                    query_left_open_gap_score=-4.603482442746141, 
                    query_left_extend_gap_score=-6.286504483987569, 
                    query_right_open_gap_score=-3.724635985595305, 
                    query_right_extend_gap_score=-4.338137490820411)


In [30]:
alignment = aligner.align(sequence1, sequence2)

for alignment in alignment:
    print(f"Alignment: {alignment}")
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    break

Alignment: target            0 M--V-H-L-T-P-E--EK-----S-A--V-T--A-LWG-K-V-N--V--D----E-V-GG
                  0 |------------|--||-------|----|----|-|-----|--|--|----|-----
query             0 MKY-N-E-I-N-NEGVEKLMDIF-YAKI-RTHE-QL-GP-I-FNGAVGIDDASWER-H--

target           26 -E--A-LGRLLVVYP-W-T-Q-RF-F-E-SFGDL-STPDAVMGNPKV-KA-H-GKKV-L-
                 60 -|--|-----------|-|--------|-----|-------||||-|----|------|-
query            46 KEKIAK---------FWKTM-L--L-NEN----LY------MGNP-VQ--PHI----NLL

target           69 GA-FSD-G-L--A-H-LDNL-K----G-T-F---ATLSELH-----C--D-KLHVDPENF
                120 ---|-|---|------||-|-|--------|---|--||-|-----|----|------||
query            76 --PF-DI-KLFD-V-WLD-LFKECLD-Q-VFEEKA--SE-HFYEVACNI-AK------NF

target          104 
                180 
query           117 

Matches: 41
Score: 130.32212699437665


### Alineamiento de secuencias con matriz de puntuación

In [31]:
aligner = AlignerBuilder().build()

- Para la matriz de puntuación BLOSUM62

In [32]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum62")

In [33]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 40
Score: 248.0
Alignment:
target            0 MV-H--LTP--E--EKS------A-V-TA---LWGK-V-NVDEVGGEALGRLLVVYP---
                  0 |--.--.----|--||-------|-.-|----|-|--.-|----|--|.|---.------
query             0 M-KYNEI--NNEGVEK-LMDIFYAKIRT-HEQL-G-PIFN----G--AVG---I---DDA

target           37 -WTQRFF--E--S-FGDLS--TP----DA-V-MGNPKVKA-HGKKV-LGA--FSDG--LA
                 60 -|-.|----|--.-|------|-----.--.-||||-|.--|---.-|----|-|---|-
query            41 SW-ER--HKEKIAKF----WKT-MLLNE-NLYMGNP-VQ-PH---INL--LPF-D-IKL-

target           77 H----LDNL-KGT------F---ATLSELH-----CD--KLHVDPENFR 105
                120 -----||-|-|--------|---|--||-|-----|.--|------||. 169
query            82 -FDVWLD-LFK--ECLDQVFEEKA--SE-HFYEVACNIAK------NFK 118



- Para la matriz de puntuación PAM250

In [34]:
alignments = aligner.align(sequence1, sequence2, matrix="pam250")

In [35]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 38
Score: 267.0
Alignment:
target            0 M----VHLTP-E--EKSAVTALW-G----KV--NVDEVG----GEALGRLLVVY--P-WT
                  0 |----..----|--||-----|--.----|.--.-...|----|-|.|---.----.-|-
query             0 MKYNEIN---NEGVEK-----L-MDIFYAKIRTH-EQLGPIFNG-AVG---I--DDASW-

target           39 QRFF--E--S-FGD----L-STPDA-V-MGNPKVKAHGKKV-LG-AFSDG--LA-H--LD
                 60 .|----|--.-|------|-.--.--.-||||-|..|---.-|--.|-|---|--.--||
query            43 ER--HKEKIAKF--WKTMLLN--E-NLYMGNP-VQPH---INL-LPF-D-IKL-FDVWLD

target           80 NL-K---GT--F---ATLSELH-----CD--KLH---VDPEN-FR 105
                120 -|-|---.---|---|--||-|-----|.--|-.---|-----|. 165
query            88 -LFKECLD-QVFEEKA--SE-HFYEVACNIAK-NFKAV----LFQ 123



- Para la matriz de puntuación BLOSUM50

In [36]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum50")

In [37]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 40
Score: 318.0
Alignment:
target            0 M----VHLTP-E--EKS------A-V-TA---LWGK-V-NVDEVGGEALGRLLVVYP---
                  0 |----..----|--||-------|-.-|----|-|--.-|----|--|.|---.------
query             0 MKYNEIN---NEGVEK-LMDIFYAKIRT-HEQL-G-PIFN----G--AVG---I---DDA

target           37 -WTQRFF--E--S-FGD----L-STPDA-V-MGNPKVKA-HGKKV-LGA--FSDG--LAH
                 60 -|-.|----|--.-|------|-.--.--.-||||-|.--|---.-|----|-|---|--
query            41 SW-ER--HKEKIAKF--WKTMLLN--E-NLYMGNP-VQ-PH---INL--LPF-D-IKL--

target           78 ----LDNL-KGT------F---ATLSELH-----CD--KLHVDPENFR 105
                120 ----||-|-|--------|---|--||-|-----|.--|------||. 168
query            82 FDVWLD-LFK--ECLDQVFEEKA--SE-HFYEVACNIAK------NFK 118

