# Advanced multiple sequence alignment - multiple tracks

This example is equal to the "MSABasic" example, except for the fact that it uses two additional tracks in the scoring:
* A track containing a motif prediction for the N-terminal glycosylation motif (`N-{P}-[ST]-{P}`).
* A track containing the predicted (3-state) secondary structure.

## Import and setup PRALINE 2

In [None]:
import praline
import praline.container
import praline.component
from praline.core import *

# Will automatically load all components which are installed.
index = TypeIndex()
index.autoregister()

# Will be used to actually run the PRALINE alignments.
manager = Manager(index)

## Create custom alphabets

In [None]:
ALPHABET_3STATE_SS = praline.container.Alphabet(
    'praline.example.ThreeStateSecondaryStructure', 
    [('C', 0), ('H', 1), ('E', 2)]
)

ALPHABET_MOTIF = praline.container.Alphabet(
    'praline.example.SimpleMotifMatch', 
    [('*', 0), ('M', 1)]
)

## Load/create score matrices and setup environment

To see more options you can configure in the alignment environment, see the comments at the top of every Component subclass.

In [None]:
# Load score matrix.
with praline.open_builtin('matrices/blosum62') as f:
    score_matrix = praline.load_score_matrix(f, alphabet=praline.component.ALPHABET_AA)

# Very simple scoring matrices
MOTIF_SCORES = {
    ('M', 'M'): 15,
    ('M', '*'): 0,
    ('*', 'M'): 0,
    ('*', '*'): 0,
}

SS_SCORES = {
    ('C', 'C'): 3,
    ('C', 'H'): 0,
    ('C', 'E'): 0,
    ('H', 'H'): 3,
    ('H', 'C'): 0,
    ('H', 'E'): 0,
    ('E', 'E'): 3,
    ('E', 'C'): 0,
    ('E', 'H'): 0,
}
    
score_matrix_motif = praline.container.ScoreMatrix(MOTIF_SCORES, [ALPHABET_MOTIF, ALPHABET_MOTIF])    
score_matrix_ss = praline.container.ScoreMatrix(SS_SCORES, [ALPHABET_3STATE_SS, ALPHABET_3STATE_SS])    

# Setup environment.
keys = {}
keys['gap_series'] = [-11., -1.]
keys['linkage_method'] = 'average'
keys['aligner'] = praline.component.PairwiseAligner.tid
keys['merge_mode'] = 'global'
keys['dist_mode'] = 'global'

env = Environment(keys=keys)

# Initialize root node for output
root_node = TaskNode("__ROOT__")

## Load sequences & attach additional tracks

In [None]:
TRACK_ID_SS = 'praline.example.SecondaryStructureTrack'
TRACK_ID_MOTIF = 'praline.example.MotifTrack'

seqs = praline.load_sequence_fasta('./data/BBA0184.tfa', praline.container.ALPHABET_AA)
seqs_motif = praline.load_sequence_fasta('./data/BBA0184.motif.tfa', ALPHABET_MOTIF)
seqs_ss = praline.load_sequence_fasta('./data/BBA0184.ss.tfa', ALPHABET_3STATE_SS)

# We assume the motif & secondary structure sequences are given in the same order as the AA sequences. 
# If not, we'd have to key them by the sequence name or something similar.
for i, (seq, seq_ss, seq_motif) in enumerate(zip(seqs, seqs_ss, seqs_motif)):
    track_motif = seq_motif.get_track(praline.container.TRACK_ID_INPUT)
    track_ss = seq_ss.get_track(praline.container.TRACK_ID_INPUT)
    
    seq.add_track(TRACK_ID_MOTIF, track_motif)
    seq.add_track(TRACK_ID_SS, track_ss)

seqs

## Build preprofiles

In [None]:
# Create an execution context for performing the master slave alignments.
execution = Execution(manager, "__ROOT__")

# Iterate over all our input sequences, letting each have its turn as the master sequence.
seq_set = set(seqs)
for seq in seqs:
    task = execution.add_task(praline.component.GlobalMasterSlaveAligner)
    task.inputs(
        master_sequence=seq,         
        slave_sequences=list(seq_set - {seq}), 
        track_id_sets=[[praline.container.TRACK_ID_INPUT], [TRACK_ID_MOTIF], [TRACK_ID_SS]],
        score_matrices=[score_matrix, score_matrix_motif, score_matrix_ss]
    )
    task.environment(env)

# Execute the tasks and collect the resulting master slave alignments
messages = [msg for msg in execution.run()]
alignments = [o['alignment'] for o in execution.outputs]

In [None]:
# Create an execution context for building the preprofiles
execution = Execution(manager, "__ROOT__")
for aln in alignments:
    task = execution.add_task(praline.component.ProfileBuilder)
    task.inputs(
        alignment=aln,
        track_id=praline.container.TRACK_ID_INPUT
    )
    task.environment(env)

# Execute the tasks and add the resulting preprofile tracks to our input sequences
messages = [msg for msg in execution.run()]
for i, output in enumerate(execution.outputs):
    track = output['profile_track']
    seqs[i].add_track(praline.container.TRACK_ID_PREPROFILE, track)

## Build guide tree with preprofiles, extra tracks

In [None]:
# Create an execution context for building the guide tree
execution = Execution(manager, "__ROOT__")
task = execution.add_task(praline.component.GuideTreeBuilder)
task.inputs(
    sequences=seqs, 
    track_id_sets=[[praline.container.TRACK_ID_PREPROFILE], [TRACK_ID_MOTIF], [TRACK_ID_SS]], 
    score_matrices=[score_matrix, score_matrix_motif, score_matrix_ss]
)
task.environment(env)

messages = [msg for msg in execution.run()]
tree = execution.outputs[0]['guide_tree']

## Use guide tree, preprofiles and extra tracks to progressively construct MSA

In [None]:
# Create an execution context for building the final MSA
execution = Execution(manager, "__ROOT__")
task = execution.add_task(praline.component.TreeMultipleSequenceAligner)
task.inputs(
    sequences=seqs, 
    guide_tree=tree, 
    track_id_sets=[[praline.container.TRACK_ID_PREPROFILE], [TRACK_ID_MOTIF], [TRACK_ID_SS]], 
    score_matrices=[score_matrix, score_matrix_motif, score_matrix_ss]
)
task.environment(env)

messages = [msg for msg in execution.run()]
msa = execution.outputs[0]['alignment']

## Write MSA to disk

In [None]:
praline.write_alignment_fasta('data/BBA0184.multitrack.aln', msa, praline.container.TRACK_ID_INPUT)