# Visualize DAG1 multiple sequence alignments

In [None]:
# Imports
import os
import yaml
import warnings
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from pymsaviz import MsaViz 
from Bio import SeqIO, AlignIO, Phylo

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#EE7733",
    "#CC3311",
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={
    "figure.dpi":300, 
    "savefig.dpi":300,
    "svg.fonttype":"none",
})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

In [None]:
# Open config file
config_path = "Configure/config.yml"
with open(config_path) as f:
    config = yaml.safe_load(f)

# Set paths
reduced_alignment_file_path = config["Reduced_protein_alignment"]
alignment_file_path = config["Protein_alignment"]
out_dir = "Results/Figures/"
consensus_glycosylation_site = config["Consensus_glycosylation_site"]
human_mastomys_msa = config["Human_and_mastomys_msa"]

In [None]:
# # Uncomment to run interactively
# # Open config file
# config_path = "../Configure/config.yml"
# with open(config_path) as f:
#     config = yaml.safe_load(f)

# # Set paths
# reduced_alignment_file_path = "../" + config["Reduced_protein_alignment"]
# alignment_file_path = "../" + config["Protein_alignment"]
# out_dir = "../Results/Figures/"
# consensus_glycosylation_site = "../" + config["Consensus_glycosylation_site"]
# human_mastomys_msa = "../" + config["Human_and_mastomys_msa"]

In [None]:
# Count number of sequences with concensus sequence
position_counts = [0,0,0,0,0]
aa_identities = ["T", "P", "T", "P", "V"]
DAG1_alignment = AlignIO.read(alignment_file_path, "fasta")
for seq in DAG1_alignment[:, 710:715]:
    if seq.seq[0] == "T":
        position_counts[0] += 1
    if seq.seq[1] == "P":
        position_counts[1] += 1
    if seq.seq[2] == "T":
        position_counts[2] += 1
    if seq.seq[3] == "P":
        position_counts[3] += 1
    if seq.seq[4] == "V":
        position_counts[4] += 1

# Create dataframe from data
consensus_df = pd.DataFrame({"amino acid" : aa_identities, "counts" : position_counts})
consensus_df["percent consensus"] = consensus_df["counts"] / 527 # number of sequences: 527
consensus_df

In [None]:
# Visualize alignment
# number of sequences: 527
mv = MsaViz(
    alignment_file_path, 
    color_scheme="Identity", 
    consensus_color="#CC3311",
    wrap_length=5, 
    show_grid=True, 
    show_consensus=True,
    start=711,
    end=715,
)

mv.set_plot_params(
    grid_color="black", 
    identity_color="lightgrey"
)

fig = mv.plotfig()

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(consensus_glycosylation_site)

In [None]:
# Visualize alignment
mv = MsaViz(
    reduced_alignment_file_path, 
    color_scheme="Identity", 
    consensus_color="lightgrey",
    wrap_length=75, 
    show_grid=True, 
    show_consensus=False,
)

mv.set_plot_params(
    grid_color="black", 
    identity_color="lightgrey",
)

# Add text annotations
mv.add_text_annotation((1, 653), "alpha-dystroglycan", text_color="#AA4499", range_color="#AA4499")
mv.add_text_annotation((654, 896), "beta-dystrolgycan", text_color="#88CCEE", range_color="#88CCEE")

fig = mv.plotfig()

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(human_mastomys_msa)