In [None]:
import subprocess
from pathlib import Path

import numpy as np
import pandas as pd
import altair as alt

import ipywidgets as widgets

from pygtftk.plugins.get_example import get_example


In [None]:
# gtftk get_example -q -d simple -f '*'
datasets = [
    "simple",
    "mini_real",
    "hg38_chr1",
    "ologram_1",
    "simple_07",
    "ologram_2"
]

for dataset in datasets:
    get_example(dataset=dataset, format="*", quiet=True)
    

In [None]:
def feature_graph(tsv_file):
    ## I'll put this in a separate module soon
    result = pd.read_csv(tsv_file, sep='\t')
    fc = result['nb_intersections_true'] / (result['nb_intersections_expectation_shuffled'] + 1)

    result["signif_color"] = "#b3b3b3"

    result.loc[(result.nb_intersections_pvalue < 0.05) & (fc < 1), "signif_color"] = '#ffa64d'
    result.loc[(result.nb_intersections_pvalue < 0.05) & (fc > 1), "signif_color"] = '#6cc67b'
    result.loc[(result.nb_intersections_pvalue < 1e-10) & (fc < 1), "signif_color"] = '#cc6600'
    result.loc[(result.nb_intersections_pvalue < 1e-10) & (fc > 1), "signif_color"] = '#3c9040'


    result["err_min"] = (
        result["nb_intersections_expectation_shuffled"]
        - result["nb_intersections_variance_shuffled"]**0.5
    )

    result["err_max"] = (
        result["nb_intersections_expectation_shuffled"]
        + result["nb_intersections_variance_shuffled"]**0.5
    )

    source = result

    chart = alt.Chart(
        result, width=600, height=400, title="Total overlap length per region type")

    x = alt.X('feature_type:O', title="Feature type")
    y = alt.Y('nb_intersections_expectation_shuffled', title='Nb of overlapping base pairs')

    bar_width = 15

    bars = chart.mark_bar(
        color="#cccccc",
        width=bar_width,
    ).encode(
        x=x,
        y=y,
        tooltip="nb_intersections_pvalue"
    )

    errs = chart.mark_rule().encode(
        x=x,
        y="err_min",
        y2="err_max",
    )

    y_true = alt.Y('nb_intersections_true')
    barrs2 = chart.mark_bar(
        color="#67b7e3",
        width=bar_width,
        xOffset=bar_width
    ).encode(
        x=x,
        y=y_true,
        tooltip="nb_intersections_pvalue"
    )

    pvals = chart.mark_text(yOffset=-20).encode(
        x=x,
        y=y_true,
        text="nb_intersections_pvalue",
    )

    return bars + errs + barrs2 + pvals


## Bar graph of ovelap length

In [None]:

available_tsvs = [t.as_posix() for t in Path('.').glob("*_ologram_stats.tsv")]
f = widgets.interact(feature_graph, tsv_file=available_tsvs)

## Computing new data (TODO)

```sh
gtftk ologram -i hg38_chr1.gtf.gz\
                -p ENCFF112BHN_H3K4me3_chr1.bed\
                -c hg38_chr1.genome\
                -u 1500 -d 1500 -D  \
                -pf example_pa_01.pdf -k 8 \
                -j summed_bp_overlaps_pvalue
```

I wanted to call directly the python function, and avoid a call to `subprocess.run` as this can be unsecure, by replicating the first example in OLOGRAM's doc like this:

```py
from pygtftk.plugins.ologram import ologram

ologram(
    inputfile="hg38_chr1.gtf.gz",
    peak_file="ENCFF112BHN_H3K4me3_chr1.bed",
    chrom_info="hg38_chr1.genome",
    upstream=1500,
    downstream=1500,
    sort_features="summed_bp_overlaps_pvalue",
    nb_threads=8
)
```

But this results in an error because the function arguments need to pass through `ArgFormatter`. 

For now, I'll just construct a call to `subprocess`, but it could be interesting to separate the preprocessing from the argument parsing (i.e. extract the preprocessing functions from the ArgFormatter objects and expose them in the API)

