# results

In [1]:
import logging
import helpers

logging.basicConfig(format=helpers.logging.format_string)
logger = logging.getLogger(__name__)
logger.setLevel("DEBUG")
logging.getLogger("helpers.deg_analysis").setLevel("DEBUG")

%load_ext autoreload
%autoreload 2

# %load_ext sql  # is always already loaded somehow
%sql duckdb:///:default:
%config SqlMagic.autopandas = True

In [2]:
from helpers.running_cibersortx.loading_results import get_arrow_dataset_for_deg_analysis_results

gene_stats = get_arrow_dataset_for_deg_analysis_results(
    "gs://liulab/differential_composition_and_expression/copied/20230505_21h41m44s/deg_analysis/"
)

In [3]:
%%sql df <<
SELECT
    origin,
    malignant_means,
    log2_fc,
    run_id,
    gene_symbol,
    perturbed,
    log2_fold_change,
    "pval",
    "-log10_pval",
    "pval_adjusted_bh",
    -1.0 * log10("pval_adjusted_bh") as "-log10_pval_adjusted_bh",
    "significant_bh_fdr=0.10",
FROM gene_stats
WHERE 
    origin = 'malignant_cibersortx'
    --AND malignant_means = 'None,None'
    --AND log2_fc in (-1.50, -1.00)
    --AND run_id = 0
;

*  duckdb:///:default:
Done.


In [4]:
from helpers.deg_analysis.postprocessing_gene_stats_fields import add_more_pval_fields


df = add_more_pval_fields(df)

2023-06-02 05:11:06,592 27686/MainThread helpers.deg_analysis.postprocessing_gene_stats_fields DEBUG Grouping by ['origin', 'malignant_means', 'log2_fc', 'run_id']
2023-06-02 05:11:08,184 27686/MainThread helpers.deg_analysis.postprocessing_gene_stats_fields DEBUG Counts of group sizes: 16063    490
dtype: int64
2023-06-02 05:11:12,900 27686/MainThread helpers.deg_analysis.postprocessing_gene_stats_fields DEBUG adding fields ['pval_adjusted_bh_signed', 'pval_adjusted_bh_signed_directional', '-log10_pval_adjusted_bh_signed', '-log10_pval_adjusted_bh_signed_directional']


In [11]:
df.dtypes

origin                                         object
malignant_means                                object
log2_fc                                       float64
run_id                                          int64
gene_symbol                                    object
perturbed                                        bool
log2_fold_change                              float64
pval                                          float64
-log10_pval                                   float64
pval_adjusted_bh                              float64
-log10_pval_adjusted_bh                       float64
significant_bh_fdr=0.10                          bool
-log10_pval_signed_directional                float64
pval_adjusted_bh_signed                       float64
pval_adjusted_bh_signed_directional           float64
-log10_pval_adjusted_bh_signed                float64
-log10_pval_adjusted_bh_signed_directional    float64
dtype: object

In [5]:
from helpers.deg_analysis.computing_classifier_roc_precion_recall import (
    compute_all_curves_and_metrics,
)
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import make_table_scores
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import plot_metric_by_threshold

In [12]:
(
    df_roc_curves,
    df_roc_auc_scores,
    df_precision_recall_curves,
    df_precision,
) = compute_all_curves_and_metrics(df)

2023-06-02 05:24:47,881 27686/MainThread helpers.deg_analysis.computing_classifier_roc_precion_recall DEBUG grouping by ['origin', 'malignant_means', 'log2_fc', 'run_id']
2023-06-02 05:24:47,882 27686/MainThread helpers.deg_analysis.computing_classifier_roc_precion_recall DEBUG calculating ROC curves with -log10_pval_signed_directional
2023-06-02 05:24:52,277 27686/MainThread helpers.deg_analysis.computing_classifier_roc_precion_recall DEBUG calculating ROC AUC scores with -log10_pval_signed_directional
2023-06-02 05:24:55,433 27686/MainThread helpers.deg_analysis.computing_classifier_roc_precion_recall DEBUG grouping by ['origin', 'malignant_means', 'log2_fc', 'run_id']
2023-06-02 05:24:55,434 27686/MainThread helpers.deg_analysis.computing_classifier_roc_precion_recall DEBUG calculating precision-recall curves with -log10_pval_adjusted_bh_signed_directional
2023-06-02 05:24:59,471 27686/MainThread helpers.deg_analysis.computing_classifier_roc_precion_recall DEBUG calculating precisio

In [13]:
make_table_scores(df_roc_auc_scores)

log2_fc,-1.500000,-1.000000,-0.500000,0.000000,0.500000,1.000000,1.500000
malignant_means,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"0.55,0.85",0.957148,0.922353,0.777725,0.507364,0.734433,0.836452,0.89825
"0.57,0.83",0.96145,0.912163,0.792249,0.508593,0.757754,0.855973,0.91807
"0.6,0.8",0.947301,0.934754,0.818051,0.507293,0.764328,0.894775,0.922897
"0.63,0.77",0.964065,0.924293,0.8351,0.516799,0.785535,0.899015,0.932544
"0.65,0.75",0.948312,0.928817,0.807047,0.502537,0.788284,0.909549,0.950682
"0.7,0.72",0.958926,0.930727,0.807395,0.516886,0.7974,0.914971,0.960028
"0.71,0.71",0.936109,0.916941,0.798867,0.511128,0.804655,0.926889,0.964664
"0.72,0.7",0.94735,0.907223,0.809963,0.487212,0.834643,0.909634,0.961284
"0.75,0.65",0.919375,0.888642,0.773396,0.520193,0.801105,0.905582,0.952603
"0.77,0.63",0.901844,0.846111,0.764827,0.509492,0.780345,0.916771,0.950566


In [14]:
make_table_scores(df_precision)

log2_fc,-1.500000,-1.000000,-0.500000,0.000000,0.500000,1.000000,1.500000
malignant_means,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"0.55,0.85",0.14366,0.056292,0.017865,0.004992,0.011698,0.014437,0.020625
"0.57,0.83",0.062297,0.176449,0.07491,0.005029,0.133393,0.038479,0.290473
"0.6,0.8",0.389048,0.289046,0.286243,0.005382,0.433553,0.262933,0.167904
"0.63,0.77",0.734812,0.453603,0.539024,0.0,0.435347,0.720284,0.603858
"0.65,0.75",0.812036,0.843393,0.383885,0.0,0.931429,0.905195,0.901569
"0.7,0.72",0.919928,0.984615,0.966667,0.0,0.511111,0.944383,0.94565
"0.71,0.71",0.926727,0.975942,0.6,0.0,0.8,0.951826,0.966784
"0.72,0.7",0.953087,0.930683,0.8,0.0,0.6,0.954235,0.967491
"0.75,0.65",0.683689,0.808333,0.6,0.0,0.44281,0.635542,0.898064
"0.77,0.63",0.520796,0.576438,0.34,0.001166,0.526923,0.867302,0.588713


In [15]:
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import (
    make_table_scores_with_stddev,
)


make_table_scores_with_stddev(df_precision)

log2_fc,-1.500000,-1.000000,-0.500000,0.000000,0.500000,1.000000,1.500000
malignant_means,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"0.55,0.85",0.14±0.20,0.06±0.05,0.02±0.00,0.00±0.00,0.01±0.00,0.01±0.01,0.02±0.01
"0.57,0.83",0.06±0.03,0.18±0.30,0.07±0.10,0.01±0.00,0.13±0.26,0.04±0.02,0.29±0.33
"0.6,0.8",0.39±0.35,0.29±0.32,0.29±0.43,0.01±0.00,0.43±0.40,0.26±0.17,0.17±0.18
"0.63,0.77",0.73±0.27,0.45±0.47,0.54±0.44,0.00±0.00,0.44±0.44,0.72±0.37,0.60±0.35
"0.65,0.75",0.81±0.16,0.84±0.20,0.38±0.46,0.00±0.00,0.93±0.10,0.91±0.19,0.90±0.13
"0.7,0.72",0.92±0.12,0.98±0.03,0.97±0.07,0.00±0.00,0.51±0.50,0.94±0.04,0.95±0.09
"0.71,0.71",0.93±0.12,0.98±0.04,0.60±0.55,0.00±0.00,0.80±0.45,0.95±0.04,0.97±0.03
"0.72,0.7",0.95±0.04,0.93±0.08,0.80±0.45,0.00±0.00,0.60±0.55,0.95±0.05,0.97±0.04
"0.75,0.65",0.68±0.21,0.81±0.24,0.60±0.55,0.00±0.00,0.44±0.44,0.64±0.43,0.90±0.15
"0.77,0.63",0.52±0.38,0.58±0.23,0.34±0.42,0.00±0.00,0.53±0.45,0.87±0.17,0.59±0.38


In [None]:
fig = plot_metric_by_threshold(
    df_precision_recall_curves,
    score_column="-log10_pval_adjusted_bh",
    metric_column="precision",
)

fig.show(renderer="png", scale=2)

In [None]:
from helpers.deg_analysis.plotting_volcanos_v1 import make_volcano_grid_scatter


fig = make_volcano_grid_scatter(
    df,
    groupby_cols=["origin", "malignant_means", "log2_fc", "gene_symbol", "perturbed"],
    pval_col="-log10_pval_adjusted_bh_signed_directional",
    perturbed_col="perturbed",
)
fig.update_layout(width=1200, height=800)
fig.update_yaxes(range=[-50, 50])
fig.update_traces(marker_size=5)

fig.show(renderer="png", scale=2)

In [None]:
deg_analysis.plotting_classifier_roc_precion_recall.make_table_scores(df_roc_auc_scores)

In [None]:
import upath
from helpers.deg_analysis.computing_classifier_roc_precion_recall import (
    calculate_precision_and_recall,
    calculate_roc,
)
from helpers.deg_analysis.plotting_classifier_roc_precion_recall import make_table_scores
from helpers.deg_analysis.postprocessing_gene_stats_fields import load_gene_stats


def compute_from_other():
    logger.debug("loading gene stats")
    path_root = upath.UPath(
        "gs://liulab/differential_composition_and_expression/20230505_21h41m44s"
    )
    df_gene_stats = load_gene_stats(path_root)
    logger.debug("subsetting to malignant_cibersortx, excluding log2_fc=0.00")
    df = (
        df_gene_stats.loc[df_gene_stats.index.get_level_values("log2_fc") != "0.00"].xs(
            "malignant_cibersortx", level="origin", drop_level=False
        )
        #
    )
    return df


def compute_tables(df):
    df_roc_curves, df_roc_auc_scores = calculate_roc(df, score_column)
    df_precision_recall_curves, df_precision = calculate_precision_and_recall(df, score_column)
    df_table_roc = make_table_scores(df_roc_auc_scores)
    df_table_precision = make_table_scores(df_precision)
    return df_table_roc, df_table_precision


logging.getLogger("helpers.deg_analysis").setLevel("DEBUG")

df_table_roc, df_table_precision = compute_from_other()

In [None]:
df_table_roc