In [56]:
from processing.DataLoader import DataLoaderFactory
from processing.AlignerBuilder import *
from processing.Optimizer import *

In [57]:
import numpy as np

In [58]:
sequenceLoader = DataLoaderFactory.get_loader("random")

In [59]:
sequence1 = sequenceLoader.load(70)
sequence2 = sequenceLoader.load(65)

print(f"Sequence 1: {sequence1}")
print(f"Sequence 2: {sequence2}")

Sequence 1: PPVQWWMCAPTGHCFLATHLSLQLWMLMDAKINGFQEEENRWLLITLWDYAKWNTHINKSSLFKWQCLCR
Sequence 2: ERIGGICHQRHIDSWDPVSNFSDPMMDLLNHVVFGPKPKPKEMPLPCHVWQETRKFTCDNRCLDT


### Alineamiento de secuencias con ajuste manual

In [60]:
aligner = AlignerBuilder().build(
                                AlignerArgs(
                                    match_score=3,
                                    mismatch_score=-1,
                                    target_internal_extend_gap_score=-3,
                                    target_internal_open_gap_score=-5
                                )
                            )

In [21]:
print(aligner.args())

AlignerArgs(match_score=3.0, 
                    mismatch_score=-1.0, 
                    target_internal_open_gap_score=-5.0, 
                    target_internal_extend_gap_score=-3.0, 
                    target_left_open_gap_score=0.0, 
                    target_left_extend_gap_score=0.0, 
                    target_right_open_gap_score=0.0, 
                    target_right_extend_gap_score=0.0, 
                    query_internal_open_gap_score=0.0, 
                    query_internal_extend_gap_score=0.0, 
                    query_left_open_gap_score=0.0, 
                    query_left_extend_gap_score=0.0, 
                    query_right_open_gap_score=0.0, 
                    query_right_extend_gap_score=0.0)


In [22]:
alignments = aligner.align(sequence1, sequence2)

In [23]:
def get_matches(alignment):
    matches = 0
    seq1 = alignment[0]
    seq2 = alignment[1]

    for i in range(len(seq1)):
        if seq1[i] == seq2[i]:
            matches += 1

    return matches

In [24]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 17
Score: 34.0
Alignment:
target            4 RMGRFTMPYRRQHLYREQSTGAWWPAWLDQEGVERVAVMDNIQGGFRHADPGQECTHYQV
                  0 |-|----|-------||---|--.|---.|..|-....|..|---....|-|--.|-|..
query             3 R-G----P-------RE---G--NP---NQKIV-YPSNMGDI---LYIED-G--YT-YWK

target           64 SEFFH 69
                 60 |---| 65
query            35 S---H 37



### Alineamiento de secuencias con algoritmo genético

In [25]:
from processing.Optimizer import *

In [26]:
def fitness_function(aligner):
    matches = 0
    
    alignment = aligner.align(sequence1, sequence2)[0]
    seq1 = alignment[0]
    seq2 = alignment[1]

    for i in range(len(seq1)):
        if seq1[i] == seq2[i]:
            matches += 1

    return matches

In [30]:
geneticAlgorithm = GeneticAlgorithm(1200, 5, sequence1, sequence2, fitness_function)
aligner = geneticAlgorithm.run()

In [32]:
print(aligner.args())

AlignerArgs(match_score=8.533147827570538, 
                    mismatch_score=-1.9268962573879844, 
                    target_internal_open_gap_score=-0.4058873786894135, 
                    target_internal_extend_gap_score=-2.158757823173969, 
                    target_left_open_gap_score=-5.571837805668731, 
                    target_left_extend_gap_score=-6.36657008289916, 
                    target_right_open_gap_score=-3.988510482895383, 
                    target_right_extend_gap_score=-7.07467582878499, 
                    query_internal_open_gap_score=-1.9226480932711587, 
                    query_internal_extend_gap_score=-2.215102699217634, 
                    query_left_open_gap_score=-9.99125959848215, 
                    query_left_extend_gap_score=-1.444828052277225, 
                    query_right_open_gap_score=-5.507051270807932, 
                    query_right_extend_gap_score=-4.271952470364822)


In [31]:
alignment = aligner.align(sequence1, sequence2)

for alignment in alignment:
    print(f"Alignment: {alignment}")
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    break

Alignment: target            4 RMG-RFTMPYRRQ-HLYREQS-TGAWWPAWLDQE-G------V-ERVAVMDNIQGGFRHA
                  0 |-|-|...|-.-|-..|-.-|-.|--.-.-|..|-|------.-|-|...||---.-|--
query             3 R-GPREGNP-N-QKIVY-P-SNMG--D-I-LYIEDGYTYWKSHCE-VCGKDN---K-R--

target           53 DPGQECT----HYQVSEF 67
                 60 -|----|----|...|.| 78
query            47 -P----TLNIAHKWPSWF 60

Matches: 21
Score: 75.90476193134981


### Alineamiento de secuencias con matriz de puntuación

In [45]:
aligner = AlignerBuilder().build()

- Para la matriz de puntuación BLOSUM62

In [46]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum62")

In [44]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 15
Score: 89.0
Alignment:
target            4 RMGRFTMPYRRQHLYREQSTGAWWPAWLDQEGVERVAVMDNIQGGFRHADPGQECTHYQV
                  0 |-|----|-------||---|.--|---.|.-.--|-...|.-|.......|.--|...-
query             3 R-G----P-------RE---GN--P---NQK-I--V-YPSNM-GDILYIEDGY--TYWK-

target           64 SEFFH 69
                 60 |---| 65
query            35 S---H 37



- Para la matriz de puntuación PAM250

In [48]:
alignments = aligner.align(sequence1, sequence2, matrix="pam250")

In [49]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 18
Score: 145.0
Alignment:
target            2 QK--RMG-RFTM---P-----YRRQ--H----LYR-EQSTG-A-WW-PAWLDQ-E--GV-
                  0 .---|-|-|------|-----|-----.----||--|.--|-.-|--.----.-|--|--
query             0 D-FDR-GPR---EGNPNQKIVY---PSNMGDILY-IED--GYTYW-KS----HCEVCG-K

target           37 E--RVAV-MDNIQGGFRHAD--PGQECTH-Y---QV 64
                 60 .--|-.--..-|-.---|----|.------.---|| 96
query            43 DNKR-P-TLN-I-A---H--KWPS-----WFALAQV 65



- Para la matriz de puntuación BLOSUM50

In [50]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum50")

In [51]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 20
Score: 175.0
Alignment:
target            4 RMGRFTMPYRRQHLYREQSTGAWW-PAWLDQEGVERVAV----M-DNI---Q-GGF---R
                  0 |-|----|-|------|---|----|---.|----..-|----|-|-|---.-|-.---.
query             3 R-G----P-R------E---G---NP---NQ----KI-VYPSNMGD-ILYIEDG-YTYWK

target           51 -HADPGQ-E-C-------T----HYQV--SEFF---H 69
                 60 -|------|-|-------|----|-.---|-.|---. 97
query            35 SH-----CEVCGKDNKRPTLNIAH-K-WPS-WFALAQ 64



- Para la matriz de puntuación BLOSUM80

In [52]:
alignments = aligner.align(sequence1, sequence2, matrix="blosum80")

In [53]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 20
Score: 213.0
Alignment:
target            4 RMGRFTMPYRRQHLYREQSTGAWW-PAWLDQEGVERVAV----M-DNI---Q-GGF---R
                  0 |-|----|-|------|---|----|---.|----..-|----|-|-|---.-|-.---.
query             3 R-G----P-R------E---G---NP---NQ----KI-VYPSNMGD-ILYIEDG-YTYWK

target           51 -HA------D---PGQECT----H-YQV-SE-FF---H 69
                 60 -|-------|---|----|----|-.---|--|----. 98
query            35 SH-CEVCGKDNKRP----TLNIAHKW--PS-WF-ALAQ 64



- Para la matriz de puntuación pam30

In [54]:
alignments = aligner.align(sequence1, sequence2, matrix="pam30")

In [55]:
for alignment in alignments:
    print(f"Matches: {get_matches(alignment)}")
    print(f"Score: {alignment.score}")
    print(f"Alignment:")
    print(alignment)
    break

Matches: 20
Score: 167.0
Alignment:
target            4 RMGRFTMPYRRQHLYREQSTGAWW-PAWLDQEG-VERVAV----M-DNI---Q-GGFR--
                  0 |-|----|-|------|---|----|---.|---.--|------|-|-|---.-|-.---
query             3 R-G----P-R------E---G---NP---NQ--KI--V--YPSNMGD-ILYIEDG-Y-TY

target           51 ---HADPGQ-E-C-------T----H-----Y---QV 64
                 60 ---|------|-|-------|----|-----.---|| 97
query            33 WKSH-----CEVCGKDNKRPTLNIAHKWPSWFALAQV 65

