# results

In [None]:
import logging
import helpers

logging.basicConfig(format=helpers.logging.format_string)
logger = logging.getLogger(__name__)
logger.setLevel("DEBUG")
logging.getLogger("helpers.deg_analysis").setLevel("DEBUG")

%load_ext autoreload
%autoreload 2

# %load_ext sql  # is always already loaded somehow
%sql duckdb:///:default:
%config SqlMagic.autopandas = True

In [None]:
from helpers.running_cibersortx.loading_results import get_arrow_dataset_for_deg_analysis_results

gene_stats = get_arrow_dataset_for_deg_analysis_results(
    "gs://liulab/differential_composition_and_expression/copied/20230505_21h41m44s/deg_analysis/"
)

In [None]:
%%sql df <<
SELECT
    origin,
    malignant_means,
    log2_fc,
    run_id,
    gene_symbol,
    perturbed AND log2_fc != 0 AS perturbed,
    log2_fold_change,
    "pval",
    "-log10_pval",
    "pval_adjusted_bh",
    -1.0 * log10("pval_adjusted_bh") as "-log10_pval_adjusted_bh",
    "significant_bh_fdr=0.10",
FROM gene_stats
WHERE 
    origin = 'malignant_cibersortx'
    --AND malignant_means = 'None,None'
    --AND log2_fc in (-1.50, -1.00)
    --AND run_id = 0
;

In [None]:
from helpers.deg_analysis.postprocessing_gene_stats_fields import add_more_pval_fields


df = add_more_pval_fields(df)

In [None]:
from helpers.deg_analysis.computing_classifier_roc_precion_recall import (
    compute_all_curves_and_metrics,
)
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import make_score_table, make_score_table_with_stddev
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import plot_metric_by_threshold

In [None]:
(
    df_roc_curves,
    df_precision_recall_curves,
    df_scores,
) = compute_all_curves_and_metrics(df, signed_directional=True)

In [None]:
make_score_table(df_scores["true_pos_count"])

In [None]:
make_score_table(df_scores["false_pos_count"])

In [None]:
make_score_table(df_scores["roc_auc"])

In [None]:
make_score_table_with_stddev(df_scores["roc_auc"])

In [None]:
make_score_table(df_scores["precision"])

In [None]:
make_score_table_with_stddev(df_scores["precision"])

In [None]:
make_score_table(df_scores["recall"])

In [None]:
# what's going on at malignant_means = '0.71,0.71'?
# why is the precision so low?


In [None]:
fig = plot_metric_by_threshold(
    df_precision_recall_curves,
    score_column="-log10_pval_adjusted_bh_signed_directional",
    metric_column="precision",
)
fig.update_layout(width=1200, height=1600)
fig.show(renderer="png", scale=2)

In [None]:
from helpers.deg_analysis.plotting_volcanos_v1 import make_volcano_grid_scatter


fig = make_volcano_grid_scatter(
    df,
    groupby_cols=["origin", "malignant_means", "log2_fc", "gene_symbol", "perturbed"],
    pval_col="-log10_pval_adjusted_bh_signed_directional",
    perturbed_col="perturbed",
)
fig.update_layout(width=1200, height=800)
fig.update_yaxes(range=[-50, 50])
fig.update_traces(marker_size=5)

fig.show(renderer="png", scale=2)

In [None]:
deg_analysis.plotting_classifier_roc_precion_recall.make_score_table(df_roc_auc_scores)

In [None]:
import upath
from helpers.deg_analysis.computing_classifier_roc_precion_recall import (
    calculate_precision_and_recall,
    calculate_roc,
)
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import make_score_table
from helpers.deg_analysis.postprocessing_gene_stats_fields import load_gene_stats


def compute_from_other():
    logger.debug("loading gene stats")
    path_root = upath.UPath(
        "gs://liulab/differential_composition_and_expression/20230505_21h41m44s"
    )
    df_gene_stats = load_gene_stats(path_root)
    logger.debug("subsetting to malignant_cibersortx, excluding log2_fc=0.00")
    df = (
        df_gene_stats.loc[df_gene_stats.index.get_level_values("log2_fc") != "0.00"].xs(
            "malignant_cibersortx", level="origin", drop_level=False
        )
        #
    )
    return df


def compute_tables(df):
    df_roc_curves, df_roc_auc_scores = calculate_roc(df, score_column)
    df_precision_recall_curves, df_precision = calculate_precision_and_recall(df, score_column)
    df_table_roc = make_score_table(df_roc_auc_scores)
    df_table_precision = make_score_table(df_precision)
    return df_table_roc, df_table_precision


logging.getLogger("helpers.deg_analysis").setLevel("DEBUG")

df_table_roc, df_table_precision = compute_from_other()

In [None]:
df_table_roc