In [None]:
import pickle as pk

import altair as alt
import pandas as pd
import polars as pl

# Snakemake variables
#html = snakemake.output.html
#pickle = snakemake.input.pickle
pickle = "/master/abagwell/variant-analysis/results/rhesus/scikit-allel/divergence/WGS/SNPRC_WGS_WES.SNP.pickle"

# Load data
with open(pickle, "rb") as f:
    df = pk.load(f)

In [None]:
df

In [None]:
pl_df = pl.from_pandas(df).with_columns(
    pl.concat_str([
        pl.lit("chr"),
        pl.col("chrom"),
        pl.lit(":"),
        pl.col("start"),
        pl.lit("-"),
        pl.col("stop"),
    ]).alias("interval"),
    #(pl.col("dxy") / pl.col("counts")).alias("avg_dxy")  # In case the average is more important since dxy is correlated with number of variants
)

In [None]:
pl_df = pl_df.filter(
    pl.col("dxy") > 0.003
)

In [None]:
pl_df

In [None]:
window_sizes = [250, 100, 50, 25]

In [None]:
# Altair plot
select_window_size = alt.selection_point(
    name="Window",
    fields=["window_size"],
    bind=alt.binding_radio(options=["250", "100", "50", "25"]),
    #value={"window_size": "250"},
)


alt.data_transformers.disable_max_rows()
alt.Chart(pl_df.to_arrow().to_pandas()).mark_circle().encode(
    alt.X("interval", title=f"Intervals", axis=alt.Axis(labels=False, tickSize=0), sort=['1','2','3','4','5']),
    alt.Y("dxy", title="Divergence"),
    color=alt.Color("chrom", title="Chrom", sort=['1','2','3','4','5']),
    tooltip=[
        alt.Tooltip("interval", title="Interval"),
        alt.Tooltip("counts", title="Variant Count")
    ]
).properties(
    title = "Divergence between RPL and non-RPL Rhesus Macaques",
    width = 300,
).add_params(
    select_window_size
).transform_filter(
    select_window_size
)#.save("/master/abagwell/figures/allel/divergence_all_windows.html")