In [1]:
import global_align as ga

In [2]:
gap_open_cost = 3
gap_extension_cost = 2
scoring_mat = {
    "A": {"A": 2, "C": -1, "G": -1, "T": -1, "-": -gap_extension_cost},
    "C": {"A": -1, "C": 3, "G": -1, "T": -1, "-": -gap_extension_cost},
    "G": {"A": -1, "C": -1, "G": 1, "T": -1, "-": -gap_extension_cost},
    "T": {"A": -1, "C": -1, "G": -1, "T": 1, "-": -gap_extension_cost},
    "-": {"A": -gap_extension_cost, "C": -gap_extension_cost, "G": -gap_extension_cost, "T": -gap_extension_cost, "-": 1},
}
max_score = ga.get_max_similarity_score(scoring_mat=scoring_mat)
cost_mat = ga.transform_scoring_mat_to_cost_mat(
    scoring_mat=scoring_mat,
    max_score=max_score
)
cost_mat

{'A': {'A': 1, 'C': 4, 'G': 4, 'T': 4, '-': 5},
 'C': {'A': 4, 'C': 0, 'G': 4, 'T': 4, '-': 5},
 'G': {'A': 4, 'C': 4, 'G': 2, 'T': 4, '-': 5},
 'T': {'A': 4, 'C': 4, 'G': 4, 'T': 2, '-': 5},
 '-': {'A': 5, 'C': 5, 'G': 5, 'T': 5, '-': 2}}

In [3]:
seq_1, seq_2 = ga.draw_two_random_seqs(
    alphabet=["A", "C", 'G', "T"],
    min_len_seq_1=7,
    max_len_seq_1=7,
    min_len_seq_2=8,
    max_len_seq_2=9,
    divergence=0.2
)
print(seq_1)
print(seq_2)

TAATTGA
AAATTGAAT


In [4]:
middle_row_index = 4
best_paths_mat = ga.init_best_paths_matrix(
    dynamic_prog_num_rows=middle_row_index + 1,
    dynamic_prog_num_cols=len(seq_2) + 1
)
best_paths_mat

[[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
 [4, 2, 2, 2, 2, 2, 2, 2, 2, 2],
 [4, 2, 2, 2, 2, 2, 2, 2, 2, 2],
 [4, 2, 2, 2, 2, 2, 2, 2, 2, 2],
 [4, 2, 2, 2, 2, 2, 2, 2, 2, 2]]

In [5]:
dynamic_prog_num_cols = len(seq_2) + 1
partial_dp_mat = ga.init_partial_dynamic_prog_matrix_2(
    seq_1=seq_1,
    seq_2=seq_2,
    cost_mat=cost_mat,
    gap_open_cost=gap_open_cost,
    dynamic_prog_num_cols=dynamic_prog_num_cols
)
partial_dp_mat

[[0, 8, 13, 18, 23, 28, 33, 38, 43, 48], [8, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [6]:
moves_for_gap_open_penalty_from_left = {0, 3, 4, 11, 12, 14}
moves_for_gap_open_penalty_from_up = {0, 1, 2, 9, 10, 13}
situation_mapper = {
    # from_left_best_path_type == 1
    # and from_up_best_path_type == 3
    # 3-way ties
    ((0, 0, 0), (1, 3)): 15,
    # 2-way ties for low
    ((0, 0, 2), (1, 3)): 3,
    ((0, 2, 0), (1, 3)): 1,
    ((2, 0, 0), (1, 3)): 0,
    # 2-way ties for high
    ((0, 1, 1), (1, 3)): 5,
    ((1, 0, 1), (1, 3)): 11,
    ((1, 1, 0), (1, 3)): 9,
    # no ties
    ((0, 1, 2), (1, 3)): 3,
    # from_left_best_path_type == 1
    # and from_up_best_path_type == 4
    # 3-way ties
    ((0, 0, 0), (1, 4)): 16,
    # 2-way ties for low
    ((0, 0, 2), (1, 4)): 4,
    ((0, 2, 0), (1, 4)): 1,
    ((2, 0, 0), (1, 4)): 0,
    # 2-way ties for high
    ((0, 1, 1), (1, 4)): 6,
    ((1, 0, 1), (1, 4)): 12,
    ((1, 1, 0), (1, 4)): 9,
    # no ties
    ((0, 1, 2), (1, 4)): 4,
    # from_left_best_path_type == 2
    # and from_up_best_path_type == 3
    # 3-way ties
    ((0, 0, 0), (2, 3)): 17,
    # 2-way ties for low
    ((0, 0, 2), (2, 3)): 3,
    ((0, 2, 0), (2, 3)): 2,
    ((2, 0, 0), (2, 3)): 0,
    # 2-way ties for high
    ((0, 1, 1), (2, 3)): 7,
    ((1, 0, 1), (2, 3)): 11,
    ((1, 1, 0), (2, 3)): 10,
    # no ties
    ((0, 1, 2), (2, 3)): 3,
    # from_left_best_path_type == 2
    # and from_up_best_path_type == 4
    # 3-way ties
    ((0, 0, 0), (2, 4)): 18,
    # 2-way ties for low
    ((0, 0, 2), (2, 4)): 4,
    ((0, 2, 0), (2, 4)): 2,
    ((2, 0, 0), (2, 4)): 0,
    # 2-way ties for high
    ((0, 1, 1), (2, 4)): 8,
    ((1, 0, 1), (2, 4)): 12,
    ((1, 1, 0), (2, 4)): 10,
    # no ties
    ((0, 1, 2), (2, 4)): 4
}

In [7]:
ga.do_core_align_2(
    seq_1=seq_1,
    seq_2=seq_2,
    middle_row_index=4,
    best_paths_mat=best_paths_mat,
    partial_dp_mat=partial_dp_mat,
    gap_open_cost=gap_open_cost,
    gap_extension_cost=gap_extension_cost,
    cost_mat=cost_mat,
    moves_for_gap_open_penalty_from_left=moves_for_gap_open_penalty_from_left,
    moves_for_gap_open_penalty_from_up=moves_for_gap_open_penalty_from_up,
    situation_mapper=situation_mapper
)

IndexError: string index out of range

In [6]:
# seq_1 = "CATGGG"
# seq_1 = "C"
# seq_2 = "ACTG"
# seq_2 = "TATT"
# seq_1 = "ACACAACTAGTGCTACGTAT"
# seq_2 = "T"
# seq_1 = "TC"
# seq_2 = "T"
# seq_1 = "GTCAGCAT"
# seq_2 = "CTCTGAACACG"
# seq_1 = "CGCCTC"
# seq_2 = "GTCG"
# seq_1 = "CGCCT"
# seq_2 = "GTCG"
# seq_1 = "CATGGG"
# seq_2 = "ACTG"
dynamic_prog_num_rows = len(seq_1) + 1
dynamic_prog_num_cols = len(seq_2) + 1

In [None]:
# partial_A_mat, partial_B_mat, partial_C_mat = (ga.init_partial_dynamic_prog_matrix(
#     gap_existence_cost=gap_existence_cost,
#     seq_1=seq_1,
#     seq_2=seq_2,
#     scoring_mat=scoring_mat,
#     dynamic_prog_num_cols=dynamic_prog_num_cols
# ) for u in range(3)) 

# best_paths_mat = ga.init_best_paths_matrix(
#     dynamic_prog_num_rows=dynamic_prog_num_rows,
#     dynamic_prog_num_cols=dynamic_prog_num_cols
# )

In [None]:
# ga.do_core_align(
#     seq_1=seq_1,
#     seq_2=seq_2,
#     scoring_mat=scoring_mat,
#     gap_existence_cost=gap_existence_cost,
#     dynamic_prog_num_rows=dynamic_prog_num_rows,
#     dynamic_prog_num_cols=dynamic_prog_num_cols,
#     partial_A_mat=partial_A_mat,
#     partial_B_mat=partial_B_mat,
#     partial_C_mat=partial_C_mat,
#     best_paths_mat=best_paths_mat
# )

In [None]:
# partial_A_mat, partial_B_mat, partial_C_mat, best_paths_mat, score = ga.warmup_align(
#     seq_1=seq_1,
#     seq_2=seq_2,
#     scoring_mat=scoring_mat,
#     gap_existence_cost=gap_existence_cost,
#     dynamic_prog_num_cols=dynamic_prog_num_cols,
#     partial_A_mat=partial_A_mat,
#     partial_B_mat=partial_B_mat,
#     partial_C_mat=partial_C_mat,
#     best_paths_mat=best_paths_mat
# )

In [None]:
# ga.traceback(
#     best_paths_mat=best_paths_mat,
#     seq_1=seq_1[0],
#     seq_2=seq_2
# )

In [None]:
alignment = ga.align(
    seq_1=seq_1,
    seq_2=seq_2,
    scoring_mat=scoring_mat,
    gap_existence_cost=gap_existence_cost
)
ga.print_alignment(
    *alignment,
    chars_per_line=70
)

In [3]:
best_paths_mat = [[1, 1, 1, 1, 1], [2, 0, 1, 0, 0], [2, 0, 0, 1, 0], [2, 0, 0, 0, 1], [2, 2, 0, 0, 0], [2, 2, 0, 2, 0], [2, 2, 2, 0, 2]]
best_paths_mat

[[1, 1, 1, 1, 1],
 [2, 0, 1, 0, 0],
 [2, 0, 0, 1, 0],
 [2, 0, 0, 0, 1],
 [2, 2, 0, 0, 0],
 [2, 2, 0, 2, 0],
 [2, 2, 2, 0, 2]]

In [None]:
alignment = ga.traceback(
    best_paths_mat=best_paths_mat,
    seq_1=seq_1,
    seq_2=seq_2
)
ga.print_alignment(*alignment)