In [18]:
from pathlib import Path # type: ignore
from freyja_plot import FreyjaPlotter
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd
import numpy as np
import sys

if (module_path:=str(Path(".").absolute().resolve().parent)) not in sys.path:
    sys.path.insert(0, module_path)
from sample_info import colormap, summary_dict, expected, artic_runs as runs, plotting_dir, renameSamples, p_value_table

In [19]:
outdir = plotting_dir / "percent_of_expected/observed_vs_expected"
outdir.mkdir(exist_ok=True)

In [20]:
file_map_freyja = {runs["Freyja"]["WB"]: "Freyja", expected:"Expected"}
freyja_plotter = FreyjaPlotter(file_map=file_map_freyja,summary_dict=summary_dict,colormap=colormap)
freyja_plotter = renameSamples(freyja_plotter)

### Plot O/E ratio (y-xis) as function of Expected relative abundance (x-axis) (freyja only)

In [21]:
df = freyja_plotter.plotPercentExpectedBox(summarized=True,return_df=True)
print(df[["percent_of_expected"]].min())
df["log2OE"] = np.log2(list(df["percent_of_expected"]))
df

percent_of_expected    3.506093
dtype: object


Unnamed: 0,Sample name,lineages,abundances,scheme,expected_abundance,percent_of_expected,log2OE
29,0adgio1,Alpha,0.266966,Freyja,0.250000,106.786544,6.738586
72,0adgio1-2,Alpha,0.145397,Freyja,0.142857,101.777984,6.669282
11,0adgio1o2o3o4o5,Alpha,0.065740,Freyja,0.066667,98.61048,6.623669
47,0agio1o2,Alpha,0.065981,Freyja,0.062500,105.570064,6.722057
113,0aio1o2o3o4o5,Alpha,0.132063,Freyja,0.125000,105.650432,6.723155
...,...,...,...,...,...,...,...
124,0-3,Wuhan-hu-1,0.492819,Freyja,1.000000,49.281895,5.622986
127,0-4,Wuhan-hu-1,0.986922,Freyja,1.000000,98.692177,6.624864
78,0adgio1-2,Wuhan-hu-1,0.010017,Freyja,0.285714,3.506093,1.809864
16,0adgio1o2o3o4o5,Wuhan-hu-1,0.018629,Freyja,0.133333,13.971592,3.804425


In [22]:
px.scatter(df, x="expected_abundance", y="log2OE", color=None, hover_name="Sample name",
           marginal_y='violin',
           marginal_x='violin',
           )

In [23]:
px.scatter(df, x="expected_abundance", y="percent_of_expected", color=None, hover_name="Sample name",
           marginal_y='violin',
           marginal_x='violin',
           )

In [24]:
px.scatter(df, x="expected_abundance", 
           y="abundances", 
        #    y="percent_of_expected", 
           color="lineages", 
         #   color=None, 
           hover_name="Sample name",
           marginal_y='violin',
           marginal_x='violin',
           log_x=True,
           log_y=True,
           )

In [25]:
px.scatter(df, x="expected_abundance", y="abundances", color=None, hover_name="Sample name",
           marginal_y='violin',
           marginal_x='violin',
           )

In [26]:
fig = go.Figure()
lineages = ["Wuhan-hu-1", "Alpha", "Gamma", "Delta", "Iota", "BA.1.X", "BA.2.X", "BG.X", "BA.4.X", "BA.5.X"]
df["O/E"] = df["percent_of_expected"] / 100
for lineage in lineages:
    fig.add_trace(
        go.Box(
            y=df.loc[df["lineages"]==lineage, "O/E"], 
            name=lineage,
            showlegend=False,
            marker_color='rgb(7,40,89)',
            line_color='rgb(7,40,89)',
            boxpoints="all",
        ),
    )
fig.update_layout(title_text="O/E distribution by lineage", yaxis_title="O/E ratio", xaxis_title="Lineage")
fig.add_hline(y=1, line_width=3, line_dash="dash", line_color="grey", opacity=0.5)
fig

In [27]:
# anova for the above content
from sample_info import get_stats
p_table = p_value_table(get_stats(df,batch_col="lineages",value_col="O/E",p_min=0.01))
p_table

ANOVA for None samples comparing O/E across batches
p-value: 2.5467540136388177e-14	f-value: 15.135982156137947
The O/E was significantly different across Alpha (mean=1.0470767181481484, std. dev.=0.12735924863058914), BA.1.X (mean=0.49783806481481485, std. dev.=0.20693141059387227), BA.2.X (mean=0.7766597744242424, std. dev.=0.27197307579680513), BA.4.X (mean=1.5386482530303027, std. dev.=0.3989253765296749), BA.5.X (mean=0.9025189561515151, std. dev.=0.14057502466569102), BG.X (mean=0.7397575511538462, std. dev.=0.20347987880509336), Delta (mean=0.66341127, std. dev.=0.2628793670682751), Gamma (mean=0.8672375829166665, std. dev.=0.12886517203851586), Iota (mean=0.8699792544444443, std. dev.=0.1266435396962675), and Wuhan-hu-1 (mean=0.5021624290476191, std. dev.=0.3717420863078513), as determined by one-way ANOVA (F=15.135982156137947, p=2.5467540136388177e-14<0.01).


The O/E for Alpha differed significantly from BA.1.X with p-value 0.0.

Tukey's HSD results:


In [28]:
px.box(df, x="lineages", y="percent_of_expected", color=None, hover_name="Sample name")
# sort by time


In [29]:
# d = df.sort_values(by=["lineages", "percent_of_expected"])
d = df.sort_values(by=["Sample name"], ascending=False)
d[d["lineages"] == "BG.X"]

Unnamed: 0,Sample name,lineages,abundances,scheme,expected_abundance,percent_of_expected,log2OE,O/E
92,o3-4,BG.X,0.988411,Freyja,1.0,98.841121,6.627039,0.988411
60,o3-3,BG.X,0.988528,Freyja,1.0,98.852796,6.62721,0.988528
110,o3-2,BG.X,0.988631,Freyja,1.0,98.863098,6.62736,0.988631
27,o3,BG.X,0.988896,Freyja,1.0,98.88962,6.627747,0.988896
108,o2o3o4o5-3,BG.X,0.153964,Freyja,0.25,61.5854,5.944516,0.615854
37,o2o3o4o5-2,BG.X,0.192906,Freyja,0.25,77.162588,6.26983,0.771626
57,o2o3o4o5,BG.X,0.198967,Freyja,0.25,79.586988,6.314461,0.79587
24,o1o2o3o4o5,BG.X,0.062171,Freyja,0.125,49.736792,5.636242,0.497368
101,agio3o4o5,BG.X,0.068491,Freyja,0.125,54.79272,5.775912,0.547927
63,adgio1o2o3,BG.X,0.189621,Freyja,0.25,75.84854,6.24505,0.758485


In [30]:
fig = px.line(
    df.sort_values(by="Sample name"),
    x="Sample name", 
    y="percent_of_expected", 
    color="lineages", 
    markers=True,
)
fig.update_xaxes(categoryorder='category ascending')

fig