# Visualize Lassa virus natural diversity

In [None]:
# Imports
import os
import yaml
import math
import warnings
import pandas as pd
import seaborn as sns
import dna_features_viewer
from Bio import SeqIO
from matplotlib import pyplot as plt
from matplotlib import patches as mpatches

# Seaborn style settings
sns.set(rc={
    "figure.dpi":300, 
    "savefig.dpi":300,
    "svg.fonttype":"none",
})
sns.set_style("ticks")

# Suppress warnings
warnings.simplefilter("ignore")

In [None]:
# Open config file
config_path = "Configure/config.yml"
with open(config_path) as f:
    config = yaml.safe_load(f)

# Set paths
variation_path = config["GPC_protein_variation"]
protein_feature_file = config["Protein_features"]
figure_dir = config["Figures_dir"]
GPC_diversity_figure_path = config["GPC_diversity_figure"]

In [None]:
# # Uncomment to run interactively
# # Open config file
# config_path = "../Configure/config.yml"
# with open(config_path) as f:
#     config = yaml.safe_load(f)

# # Set paths
# variation_path = "../" + config["GPC_protein_variation"]
# protein_feature_file = "../" + config["Protein_features"]
# figure_dir = "../" + config["Figures_dir"]
# GPC_diversity_figure_path = "../" + config["GPC_diversity_figure"]

In [None]:
# Read protein variation csv
variation_df = pd.read_csv(variation_path)

In [None]:
class MyCustomTranslator(dna_features_viewer.BiopythonTranslator):
    """
    Custom translator implementing the following theme:
    """

    def compute_feature_color(self, feature):
        if feature.type == "SSP":
            return "#AA4499"
        elif feature.type == "Glycoprotein-1":
            return "#88CCEE"
        elif feature.type == "Glycoprotein-2":
            return "#EE7733"
        elif feature.type == "N-glycan":
            return "#1f78b4"
        elif feature.type == "aDG":
            return "#117733"
        elif feature.type == "LAMP1":
            return "#999933"
        else:
            return "black"

    def compute_feature_label(self, feature):
        if feature.type == "SSP":
            return "SSP"
        elif feature.type == "Glycoprotein-1":
            return "GP1"
        elif feature.type == "Glycoprotein-2":
            return "GP2"
        elif feature.type == "N-glycan":
            return None
        elif feature.type == "aDG":
            return None
        elif feature.type == "LAMP1":
            return None
        else:
            return dna_features_viewer.BiopythonTranslator.compute_feature_label(self, feature)

fig, (ax1, ax2) = plt.subplots(
    2, 1, figsize=(6, 3), 
    gridspec_kw={"height_ratios": [2,1], "wspace" : 0},
    sharex=True,
)

# Plot protein features
record = SeqIO.read(protein_feature_file, "genbank")
graphic_record = MyCustomTranslator().translate_record(record)
ax = graphic_record.plot(
    ax=ax1, 
    with_ruler=False, 
    strand_in_label_threshold=4,
)

# Plot n effective amino acids
variation_df["site0"] = variation_df["site"] - 1
neffective_plot = sns.barplot(
    data=variation_df,
    x="site0",
    y="n_effective",
    color="black",
    ax=ax2,
)
ax2.fill_between(
    variation_df["site0"], 
    variation_df["n_effective"],
    color="black",
)
neffective_plot.set_title("Lassa GPC amino acid diversity", fontsize=8)
neffective_plot.set_ylim(1, 4)
neffective_plot.set_xticks(list(range(0, 491, 100))) # start at 0 to align with feature
neffective_plot.set_xticklabels(list(range(1, 492, 100)), fontsize=8)
neffective_plot.set_yticks(list(range(1, 5)))
neffective_plot.set_yticklabels(list(range(1, 5)), fontsize=8)
neffective_plot.set_ylabel("effective\namino acids", fontsize=8)
neffective_plot.set_xlabel("site", fontsize=8)
sns.despine()

# Manually create legend
glycan_patch = mpatches.Patch(color="#1f78b4", label="N-glycosylation sites")
DG_patch = mpatches.Patch(color="#117733", label="\u03B1-DG binding sites")
LAMP1_patch = mpatches.Patch(color="#999933", label="LAMP1 binding sites")

fig.legend(
    handles=[DG_patch, LAMP1_patch, glycan_patch], 
    frameon=False,
    fontsize=8,
)

# Make output dir if doesn't exist
if not os.path.exists(figure_dir):
    os.mkdir(figure_dir)

fig.savefig(GPC_diversity_figure_path, dpi=300)