# Basic multiple sequence alignment example

This example shows how to align a fasta file with a number of sequences. No extra tracks are used, and BLOSUM62 is used to score amino acid substitutions. The gap open penalty is 11, with a gap extension penalty of 1.

## Import and setup PRALINE 2

In [None]:
import praline
import praline.container
import praline.component
from praline.core import *

# Will automatically load all components which are installed.
index = TypeIndex()
index.autoregister()

# Will be used to actually run the PRALINE alignments.
manager = Manager(index)

## Load score matrix and setup environment

To see more options you can configure in the alignment environment, see the comments at the top of every Component subclass.

In [None]:
# Load score matrix.
with praline.open_builtin('matrices/blosum62') as f:
    score_matrix = praline.load_score_matrix(f, alphabet=praline.component.ALPHABET_AA)

# Setup environment.
keys = {}
keys['gap_series'] = [-11., -1.]
keys['linkage_method'] = 'average'
keys['aligner'] = praline.component.PairwiseAligner.tid
keys['merge_mode'] = 'global'
keys['dist_mode'] = 'global'

env = Environment(keys=keys)

# Initialize root node for output
root_node = TaskNode("__ROOT__")

## Load sequences

In [None]:
seqs = praline.load_sequence_fasta('./data/BBA0184.tfa', praline.container.ALPHABET_AA)
seqs

## Build preprofiles

In [None]:
# Create an execution context for performing the master slave alignments.
execution = Execution(manager, "__ROOT__")

# Iterate over all our input sequences, letting each have its turn as the master sequence.
seq_set = set(seqs)
for seq in seqs:
    task = execution.add_task(praline.component.GlobalMasterSlaveAligner)
    task.inputs(
        master_sequence=seq,         
        slave_sequences=list(seq_set - {seq}), 
        track_id_sets=[[praline.container.TRACK_ID_INPUT]],
        score_matrices=[score_matrix]
    )
    task.environment(env)

# Execute the tasks and collect the resulting master slave alignments
messages = [msg for msg in execution.run()]
alignments = [o['alignment'] for o in execution.outputs]

In [None]:
# Create an execution context for building the preprofiles
execution = Execution(manager, "__ROOT__")
for aln in alignments:
    task = execution.add_task(praline.component.ProfileBuilder)
    task.inputs(
        alignment=aln,
        track_id=praline.container.TRACK_ID_INPUT
    )
    task.environment(env)

# Execute the tasks and add the resulting preprofile tracks to our input sequences
messages = [msg for msg in execution.run()]
for i, output in enumerate(execution.outputs):
    track = output['profile_track']
    seqs[i].add_track(praline.container.TRACK_ID_PREPROFILE, track)

## Build guide tree with preprofiles

In [None]:
# Create an execution context for building the guide tree
execution = Execution(manager, "__ROOT__")
task = execution.add_task(praline.component.GuideTreeBuilder)
task.inputs(sequences=seqs, track_id_sets=[[praline.container.TRACK_ID_PREPROFILE]], score_matrices=[score_matrix])
task.environment(env)

messages = [msg for msg in execution.run()]
tree = execution.outputs[0]['guide_tree']

## Use guide tree and preprofiles sequences to progressively construct MSA

In [None]:
# Create an execution context for building the final MSA
execution = Execution(manager, "__ROOT__")
task = execution.add_task(praline.component.TreeMultipleSequenceAligner)
task.inputs(sequences=seqs, guide_tree=tree, track_id_sets=[[praline.container.TRACK_ID_PREPROFILE]], score_matrices=[score_matrix])
task.environment(env)

messages = [msg for msg in execution.run()]
msa = execution.outputs[0]['alignment']

## Write MSA to disk

In [None]:
praline.write_alignment_fasta('data/BBA0184.aln', msa, praline.container.TRACK_ID_INPUT)