# Description

TODO

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
from IPython.display import HTML
from statsmodels.stats.multitest import multipletests

from entity import Trait
from data.cache import read_data
import conf

# Settings

In [3]:
FGSEA_INPUT_FILEPATH = Path(
    conf.RESULTS["CRISPR_ANALYSES"]["BASE_DIR"], "fgsea-hi_conf-all_lvs.tsv"
).resolve()

# Data loading

## LVs enrichment on DEG from CRISPR screen

In [4]:
deg_enrich = pd.read_csv(
    FGSEA_INPUT_FILEPATH,
    sep="\t",
)

In [5]:
deg_enrich.shape

(19740, 10)

In [6]:
deg_enrich.head()

Unnamed: 0,pathway,pval,padj,log2err,ES,NES,size,leadingEdge,lv,rep_idx
0,gene_set_decrease,0.095904,0.191808,0.140406,0.909509,1.257073,5,"PCYT2, UBE2J2, FBXW7",LV1,1
1,gene_set_increase,0.228771,0.228771,0.083836,0.840253,1.286063,3,"ACACA, MBTPS1",LV1,1
2,gene_set_decrease,0.101898,0.203796,0.135741,0.909509,1.25356,5,"PCYT2, UBE2J2, FBXW7",LV1,2
3,gene_set_increase,0.285714,0.285714,0.07218,0.840253,1.258282,3,"ACACA, MBTPS1",LV1,2
4,gene_set_decrease,0.097902,0.195804,0.138805,0.909509,1.260276,5,"PCYT2, UBE2J2, FBXW7",LV1,3


In [7]:
deg_enrich = deg_enrich.dropna()

In [8]:
# for each lv/pathway pair we ran fgsea 10 times; here take the maximum pvalue (least significant) among those runs
deg_enrich_max_idx = deg_enrich.groupby(["lv", "pathway"])["pval"].idxmax()

In [9]:
deg_enrich = deg_enrich.loc[deg_enrich_max_idx].reset_index(drop=True)
display(deg_enrich.shape)
display(deg_enrich.head())

(1974, 10)

Unnamed: 0,pathway,pval,padj,log2err,ES,NES,size,leadingEdge,lv,rep_idx
0,gene_set_decrease,0.116883,0.233766,0.12564,0.909509,1.237006,5,"PCYT2, UBE2J2, FBXW7",LV1,8
1,gene_set_increase,0.285714,0.285714,0.07218,0.840253,1.258282,3,"ACACA, MBTPS1",LV1,2
2,gene_set_decrease,0.741259,0.741259,0.026956,0.751964,1.005845,5,"TCF7L2, UBE2J2, PTEN",LV10,10
3,gene_set_increase,0.427572,0.72028,0.052805,0.81414,1.320263,3,"MBTPS1, DGAT2",LV10,4
4,gene_set_decrease,0.908092,0.908092,0.014514,0.5404,0.778719,5,"PTEN, TCF7L2",LV100,1


## MultiPLIER summary

In [10]:
# multiplier_model_summary = read_data(conf.MULTIPLIER["MODEL_SUMMARY_FILE"])

In [11]:
# multiplier_model_summary.shape

In [12]:
# multiplier_model_summary.head()

# Adjust p-values

In [13]:
adj_pvals = multipletests(deg_enrich["pval"], alpha=0.05, method="fdr_bh")

In [14]:
adj_pvals

(array([False, False, False, ..., False, False, False]),
 array([0.96728523, 0.96728523, 0.96728523, ..., 0.96728523, 0.96728523,
        0.96728523]),
 2.5984107385745858e-05,
 2.5329280648429586e-05)

In [15]:
np.sum(adj_pvals[0])

0

There are no significant LVs after correcting for multiple-testing

In [16]:
deg_enrich = deg_enrich.assign(fdr=adj_pvals[1])

In [17]:
deg_enrich.head()

Unnamed: 0,pathway,pval,padj,log2err,ES,NES,size,leadingEdge,lv,rep_idx,fdr
0,gene_set_decrease,0.116883,0.233766,0.12564,0.909509,1.237006,5,"PCYT2, UBE2J2, FBXW7",LV1,8,0.967285
1,gene_set_increase,0.285714,0.285714,0.07218,0.840253,1.258282,3,"ACACA, MBTPS1",LV1,2,0.967285
2,gene_set_decrease,0.741259,0.741259,0.026956,0.751964,1.005845,5,"TCF7L2, UBE2J2, PTEN",LV10,10,0.967285
3,gene_set_increase,0.427572,0.72028,0.052805,0.81414,1.320263,3,"MBTPS1, DGAT2",LV10,4,0.967285
4,gene_set_decrease,0.908092,0.908092,0.014514,0.5404,0.778719,5,"PTEN, TCF7L2",LV100,1,0.97425


# Analysis

In [18]:
df = deg_enrich[(deg_enrich["pval"] < 0.01)].sort_values("pval", ascending=True)

In [19]:
df.shape

(15, 11)

In [20]:
df.head()

Unnamed: 0,pathway,pval,padj,log2err,ES,NES,size,leadingEdge,lv,rep_idx,fdr
936,gene_set_decrease,0.000554,0.001108,0.477271,0.999703,1.388986,5,"FBXW7, TCF7L2",LV520,6,0.967285
1560,gene_set_decrease,0.002201,0.004403,0.431708,0.997776,1.3596,5,"UBE2J2, TCF7L2",LV801,6,0.967285
918,gene_set_decrease,0.00246,0.004921,0.431708,0.997331,1.382906,5,"FBXW7, TCF7L2",LV512,2,0.967285
327,gene_set_increase,0.003533,0.007067,0.431708,0.998221,1.582398,3,"DGAT2, ACACA",LV246,6,0.967285
1140,gene_set_decrease,0.00357,0.007141,0.431708,0.999299,1.400503,5,"PTEN, FBXW7",LV612,3,0.967285


# Save

In [21]:
# override the original file with adjusted p-values
deg_enrich.to_csv(
    FGSEA_INPUT_FILEPATH,
    sep="\t",
    index=False,
)