# Visualize full GPC phylogeny

This notebook creates a final tree figure using `ete3`

In [None]:
# Imports
import os
import yaml
import ete3
import warnings
# Need this line to render ete3 trees
# https://github.com/etetoolkit/ete/issues/296
os.environ["QT_QPA_PLATFORM"]= "offscreen"
import pandas as pd
import numpy as np
import matplotlib.colors
import seaborn as sns
import matplotlib.pyplot as plt
from pymsaviz import MsaViz 
from Bio import SeqIO, AlignIO

# Rearranged to make the tree look nicer
# re-ordered
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#88CCEE",
    "#DDDDDD",
    "#882255",
    "#117733", 
    "#DDCC77",
    "#44AA99", 
    "#EE7733",
    "#AA4499",
    "#999933", 
    "#CC3311",
]

# Seaborn style settings
sns.set(rc={"figure.dpi":300, "savefig.dpi":300})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

In [None]:
# Open config file
config_path = "Configure/config.yml"
with open(config_path) as f:
    config = yaml.safe_load(f)

# Set paths
codon_tree_file_path = config["GPC_codon_tree_prefix"] + ".treefile"
alignment_file_path = config["GPC_codon_alignment"]
metadata_file_path = config["Metadata"]
figure_dir = config["Figures_dir"]
full_tree_figure_file_path = config["GPC_codon_full_tree_figure"]

In [None]:
# # Uncomment to run interactively
# # Open config file
# config_path = "../Configure/config.yml"
# with open(config_path) as f:
#     config = yaml.safe_load(f)

# # Set paths
# codon_tree_file_path = "../" + config["GPC_codon_tree_prefix"] + ".treefile"
# alignment_file_path = "../" + config["GPC_codon_alignment"]
# metadata_file_path = "../" + config["Metadata"]
# figure_dir = "../" + config["Figures_dir"]
# full_tree_figure_file_path = "../" + config["GPC_codon_full_tree_figure"]

In [None]:
# Load metadata as dataframe
all_metadata = pd.read_csv(metadata_file_path, sep="\t")

# Load list of sequence names from alignment
strains = []
for curr_fasta in SeqIO.parse(alignment_file_path, "fasta"):
    strains.append(str(curr_fasta.id))

# Filter metadata
GPC_reduced_metadata = (
    all_metadata.loc[all_metadata["strain"].isin(strains)].copy()
)

# Add Sierra Leone for Josiah strain
GPC_reduced_metadata.at[0, "country"] = "Sierra Leone"

# Change Missing value to unknown
GPC_reduced_metadata["country"] = GPC_reduced_metadata["country"].replace(["MISSING"], ["N/A"])

In [None]:
# Map country and strain name to color
country_to_color = (
    dict(zip(
        GPC_reduced_metadata["country"].unique().tolist(),
        tol_muted_adjusted[1:]
    ))
)
# Map name to country
name_to_country = GPC_reduced_metadata.set_index("strain")["country"].to_dict()

## Create a codon tree

In [None]:
# Function to create and style tree
def get_pretty_codon_tree(treefile, country_to_color, name_to_country):
    """
    Function that formats a tree using ete3. This 
    function is modified from Equia et al analysis.
    """
    # Create ete3 tree object
    t = ete3.Tree(treefile, format=1)

    # Calculate the midpoint node
    # and set it as tree outgroup
    R = t.get_midpoint_outgroup()
    t.set_outgroup(R)
    # Ladderize tree
    t.ladderize()
    
    ts = ete3.TreeStyle()
    ts.show_leaf_name = False  # add tip names manually
    # ts.scale = 1200
    ts.branch_vertical_margin = 0
    ts.min_leaf_separation = 0
    ts.allow_face_overlap = True
    ts.mode = "c" # draw tree in circular mode
    
    name_to_color = {name: country_to_color[country] for name, country in name_to_country.items()}

    # Style and annotate each leaf
    for n in t.traverse():
        nstyle = ete3.NodeStyle()
        nstyle["hz_line_width"] = 1
        nstyle["vt_line_width"] = 1

        if n.is_leaf():
            nstyle["fgcolor"] = name_to_color[n.name]
            nstyle["size"] = 1
            nstyle["hz_line_color"] = name_to_color[n.name]
            nstyle["vt_line_color"] = name_to_color[n.name]
        else:
            nstyle["size"] = 0
            nstyle["hz_line_color"] = "black"
            nstyle["vt_line_color"] = "black"
        n.set_style(nstyle)

    # Add legend
    for country,color in country_to_color.items():
        # Marker
        marker = ete3.CircleFace(5, color=color)
        marker.margin_bottom = 1
        marker.margin_right = 8
        ts.legend.add_face(marker, column=0)
        # Text
        text = ete3.TextFace(country, ftype="Arial", fsize=8)
        text.margin_bottom = 1
        ts.legend.add_face(text, column=1)
    ts.legend_position = 2
        
    # Add scale bar but can't edit text
    # scale bar: https://github.com/etetoolkit/ete/issues/266
    ts.show_scale = True
    ts.scale_length = 0.5
         
    return t, ts

In [None]:
# Create codon tree
t, ts = get_pretty_codon_tree(codon_tree_file_path, country_to_color, name_to_country)

# # Uncomment to display in notebook
# display(
#     t.render(
#         "%%inline", 
#         tree_style=ts,
#         # units="in",
#         # h=6.5,
#         # w=3.5,
#         dpi=300,
#     )
# )

# Make output dir if doesn't exist
if not os.path.exists(figure_dir):
    os.mkdir(figure_dir)

# Render image
t.render(
    full_tree_figure_file_path, 
    tree_style=ts,
    # units="in",
    # h=6.5,
    # w=3.5,
    dpi=300,
)