In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as pg
from plotly.subplots import make_subplots
import scipy.stats

In [None]:
def create_df(filepath):
    df = pd.read_csv(filepath, sep=",")
    overview = pd.DataFrame.from_dict({
        'N': df.columns,
        'mean': df.mean(axis=0),
        'std': df.std(axis=0),
        'p': [scipy.stats.ttest_1samp(df[val], popmean=0, alternative='greater').pvalue for val in df.columns],
        'cfint': [scipy.stats.norm.interval(0.95, loc=df[val].mean(), scale=df[val].std())[1]-df[val].mean() for val in df.columns],
        'p_single': [scipy.stats.norm.cdf(0, df[val].mean(), df[val].std()) for val in df.columns]
    }).reset_index(drop=True)
    return overview


def create_plot(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(
        pg.Bar(
            x=df.N, 
            y=df['mean'],
            error_y=dict(
                type='data',
                array=df['cfint'],
                visible=True)
        ),
        secondary_y=False
    )

    fig.add_trace(
        pg.Scatter(
            x=df.N,
            y=np.log10(df.p_single),
        ),
        secondary_y=True
    )


    fig.update_layout(
        template="simple_white", 
        height=240, 
        width=330, 
        showlegend=False, 
        margin=dict(l=0, r=40, t=5, b=0),
        font_family="Inter",
        legend_font_size=28/3,
    )
    fig.update_xaxes(
        title_text='Number of reads', 
        title_font_family="Inter", 
        title_font_size=28/3, 
        tickfont_size=28/3, 
    )
    fig.update_yaxes(
        # range = [0, 1],
        title_text='log(Probability of d≤0)', 
        # dtick=0.5, 
        title_font_family="Inter", 
        title_font_size=28/3, 
        title_font_color='#ff7f0e',
        tickfont_size=28/3, 
        tickfont_color='#ff7f0e',
        tickcolor='#ff7f0e',
        # minor_ticks="outside", 
        # minor_dtick=0.25,
        linecolor='#ff7f0e',
        secondary_y=True
    )
    fig.update_yaxes(
        # range = [-0.1, 0.15],
        title_text='Distance (d)', 
        dtick=0.1, 
        title_font_family="Inter", 
        title_font_size=28/3, 
        title_font_color='#1f77b4',
        tickfont_size=28/3, 
        minor_ticks="outside", 
        minor_dtick=0.05,
        tickfont_color='#1f77b4',
        tickcolor='#1f77b4',
        linecolor='#1f77b4',
        secondary_y=False
    )
    return fig

# Unconstrained case

In [None]:
overview = create_df("./results_downsampling/N30.csv")
overview.to_csv("./results_downsampling/N30_analysis.csv", index=False)
overview

In [None]:
fig = create_plot(overview)
fig.show()
fig.write_image("./results_downsampling/unconstrained.svg")

# Constrained case

In [None]:
overview = create_df("./results_downsampling/N30_constrained.csv")
overview.to_csv("./results_downsampling/N30_constrained_analysis.csv", index=False)
overview

In [None]:
fig = create_plot(overview)
fig.show()
fig.write_image("./results_downsampling/constrained.svg")