In [None]:
import tszip
import sc2ts
import pandas as pd

In [None]:
ts = tszip.load(snakemake.input[0])
ts

In [None]:
ti = sc2ts.TreeInfo(ts, show_progress=False)
ti

In [None]:
ti.plot_mutations_per_node_distribution();

In [None]:
ti.plot_mutations_per_site_distribution();

In [None]:
ti.plot_mutations_per_site(annotate_threshold=0.65);

In [None]:
ti.plot_mutations_per_site(annotate_threshold=0.10, select=ti.mutations_is_reversion);

In [None]:
ti.plot_deletion_overlaps(annotate_threshold=0.1);

In [None]:
df = ti.deletions_summary()
df

In [None]:
df.to_csv(snakemake.output[0]);

In [None]:
df.length.value_counts()

In [None]:
dfl = df[df.max_inheritors > 100].sort_values("max_inheritors")
dfl

In [None]:
dfl.length.value_counts()

In [None]:
df.sort_values("max_inheritors").tail(20)

# Examine the mutations

In [None]:
data = []
for site in ts.sites():
    original_mutations = site.metadata["sc2ts"].get("original_mutations", None)
    if original_mutations is not None:
        new_mutations = []
        for mut in site.mutations:
            new_mutations.append(
                {
                    "node": mut.node,
                    "derived_state": mut.derived_state,
                    "metadata": mut.metadata,
                }
            )
        data.append(
            {
                "site": int(site.id),
                "position": int(site.position),
                "original_mutations": original_mutations,
                "new_mutations": new_mutations,
            }
        )

In [None]:
df_data = []
for row in data:
    A = set((d["node"], d["derived_state"]) for d in row["original_mutations"])
    B = set((d["node"], d["derived_state"]) for d in row["new_mutations"])
    df_data.append({"original_mutations": len(A), "new_mutations": len(B), 
                    "intersection": len(A & B), "position": row["position"]})
    
dfm = pd.DataFrame(df_data)
dfm

In [None]:
dfm[dfm.original_mutations > 500]

In [None]:
dfm.original_mutations.hist();

In [None]:
dfm.new_mutations.hist();

In [None]:
dfm.original_mutations.describe()

In [None]:
dfm.new_mutations.describe()

In [None]:
dfm.intersection.describe()