# Description

TODO

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
from IPython.display import HTML
from statsmodels.stats.multitest import multipletests

from entity import Trait
from data.cache import read_data
import conf

# Settings

In [3]:
FGSEA_INPUT_FILEPATH = Path(
    conf.RESULTS["CRISPR_ANALYSES"]["BASE_DIR"], "fgsea-all_lvs.tsv"
).resolve()

# Data loading

## LVs enrichment on DEG from CRISPR screen

In [4]:
deg_enrich = pd.read_csv(
    FGSEA_INPUT_FILEPATH,
    sep="\t",
).drop(columns=["padj"])

In [5]:
deg_enrich.shape

(1973, 9)

In [6]:
deg_enrich.head()

Unnamed: 0,pathway,pval,log2err,ES,NES,size,leadingEdge,lv,rep_idx
0,gene_set_decrease,0.184815,0.095921,0.578418,1.126666,35,"PTBP1, KEAP1, PEX14, DLST, PCYT2, MAD2L2, GLRX...",LV1,1
1,gene_set_increase,0.235764,0.082205,0.539155,1.076511,63,"CHERP, RANGAP1, HNRNPL, RPS2, E4F1, TAF1C, GAT...",LV1,1
2,gene_set_decrease,0.879397,0.017313,0.464502,0.749023,13,"MBOAT1, ACVR1B, CSK, TCF7L2, TAZ, OGDH",LV2,1
3,gene_set_increase,,,-0.300857,,18,"CSTF3, NOP56, USP39, LUC7L3, RPS28, MED8, RPS1...",LV2,1
4,gene_set_decrease,0.806194,0.02237,0.518921,0.872645,28,"ACVR1B, PPP2R2B, FBXW7, PIK3R1, PEX14, NDUFA4,...",LV3,1


In [7]:
deg_enrich = deg_enrich.dropna()

In [8]:
# for each lv/pathway pair we ran fgsea 10 times; here take the maximum pvalue (least significant) among those runs
deg_enrich_max_idx = deg_enrich.groupby(["lv", "pathway"])["pval"].idxmax()

In [9]:
deg_enrich = deg_enrich.loc[deg_enrich_max_idx].reset_index(drop=True)
display(deg_enrich.shape)
display(deg_enrich.head())

(1966, 9)

Unnamed: 0,pathway,pval,log2err,ES,NES,size,leadingEdge,lv,rep_idx
0,gene_set_decrease,0.184815,0.095921,0.578418,1.126666,35,"PTBP1, KEAP1, PEX14, DLST, PCYT2, MAD2L2, GLRX...",LV1,1
1,gene_set_increase,0.235764,0.082205,0.539155,1.076511,63,"CHERP, RANGAP1, HNRNPL, RPS2, E4F1, TAF1C, GAT...",LV1,1
2,gene_set_decrease,0.945,0.011106,0.434799,0.68995,20,"VDR, ACVR1B, KEAP1, NDUFV2, PEX14, NDUFS3, WDR26",LV10,1
3,gene_set_decrease,0.027027,0.276501,0.831205,1.736761,27,"NDUFB7, PTBP1, RRAGC, PPP2R2B, SQLE",LV100,1
4,gene_set_increase,0.100899,0.13649,0.706986,1.467907,40,"PCBP1, IGF1R, UXT, SNRPD1, GTF2H1",LV100,1


## MultiPLIER summary

In [10]:
# multiplier_model_summary = read_data(conf.MULTIPLIER["MODEL_SUMMARY_FILE"])

In [11]:
# multiplier_model_summary.shape

In [12]:
# multiplier_model_summary.head()

# Adjust p-values

In [13]:
adj_pvals = multipletests(deg_enrich["pval"], alpha=0.05, method="fdr_bh")

In [14]:
adj_pvals

(array([False, False, False, ..., False, False, False]),
 array([0.68522993, 0.70804322, 1.        , ..., 0.89093706, 0.54570796,
        0.86760812]),
 2.6089839912368795e-05,
 2.5432349949135303e-05)

In [15]:
np.sum(adj_pvals[0])

4

In [16]:
deg_enrich = deg_enrich.assign(padj=adj_pvals[1])

In [17]:
deg_enrich.head()

Unnamed: 0,pathway,pval,log2err,ES,NES,size,leadingEdge,lv,rep_idx,padj
0,gene_set_decrease,0.184815,0.095921,0.578418,1.126666,35,"PTBP1, KEAP1, PEX14, DLST, PCYT2, MAD2L2, GLRX...",LV1,1,0.68523
1,gene_set_increase,0.235764,0.082205,0.539155,1.076511,63,"CHERP, RANGAP1, HNRNPL, RPS2, E4F1, TAF1C, GAT...",LV1,1,0.708043
2,gene_set_decrease,0.945,0.011106,0.434799,0.68995,20,"VDR, ACVR1B, KEAP1, NDUFV2, PEX14, NDUFS3, WDR26",LV10,1,1.0
3,gene_set_decrease,0.027027,0.276501,0.831205,1.736761,27,"NDUFB7, PTBP1, RRAGC, PPP2R2B, SQLE",LV100,1,0.638514
4,gene_set_increase,0.100899,0.13649,0.706986,1.467907,40,"PCBP1, IGF1R, UXT, SNRPD1, GTF2H1",LV100,1,0.646958


# Analysis

In [18]:
df = deg_enrich[(deg_enrich["padj"] < 0.05)].sort_values("padj", ascending=True)

In [19]:
df.shape

(4, 10)

In [20]:
df.sort_values("pval")

Unnamed: 0,pathway,pval,log2err,ES,NES,size,leadingEdge,lv,rep_idx,padj
1279,gene_set_decrease,8.966156e-08,0.704976,0.858542,1.642519,43,"NDUFA4, COX6A1, ATP5O, NDUFB10, COX5A, NDUFS3,...",LV678,1,0.000111
1344,gene_set_increase,1.127631e-07,0.704976,0.905734,1.567693,49,"RPS14, RPL31, RPS19, RPS11, RPS6, RPL37, RPSA,...",LV707,1,0.000111
1783,gene_set_increase,1.187932e-05,0.593325,0.732205,1.485856,60,"RPS6, RPL6, RPLP0, RPL19, RPL31, RPL7, RPS13, ...",LV905,1,0.007785
1805,gene_set_increase,7.163235e-05,0.538434,0.627667,1.459288,83,"SAFB, LUC7L3, HSP90B1, CHD4, SNRPD3, ISY1, DKC...",LV915,1,0.035207


# Save

In [22]:
# override the original file with adjusted p-values
deg_enrich.to_csv(
    FGSEA_INPUT_FILEPATH,
    sep="\t",
)