# Pseudo-bulk joint functional analysis with decoupler

In [None]:
import scanpy as sc
import decoupler as dc

# Only needed for processing
import numpy as np
import pandas as pd

# Needed for some plotting
import matplotlib.pyplot as plt

import os
import anndata as ad
import scanorama

from wrapper_functions import *

In [None]:
# Automatically re-load wrapper functions after an update
# Find details here: https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [None]:
organism = Organism.mouse
analyze_params = Analyze(protocol=Protocol.FF, organism=organism)

In [None]:
root_path = os.getcwd()
inpath='your_inpath_folder' # Replace with the location of your samples
results_folder = os.path.join(root_path, 'analyzed', 'zonation')

In [None]:
file_names = [f for f in os.listdir(results_folder) if os.path.isfile(os.path.join(results_folder, f))]

adata_list = [ad.read(os.path.join(results_folder, file)) for file in file_names if file.endswith('.h5ad')]





























In [None]:
adatas_cor = scanorama.correct_scanpy(adata_list, return_dimred=True)

[[0.00000000e+00 0.00000000e+00 1.27445500e-01 6.46829811e-01
  7.38552437e-04 3.55871886e-03 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 1.23839009e-03 0.00000000e+00
  7.61614623e-04 0.00000000e+00 0.00000000e+00 2.21565731e-03
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 2.85394933e-01 7.45156483e-04
  0.00000000e+00 8.56929955e-02 0.00000000e+00 2.83159463e-02
  0.00000000e+00 2.98062593e-03 9.17431193e-03 0.00000000e+00
  6.60953800e-01 1.19225037e-02]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 8.38457239e-02
  1.82100684e-01 1.11794298e-02 0.00000000e+00 0.00000000e+00
  5.21296885e-02 0.00000000e+00 2.25265511e-01 0.00000000e+00
  7.61614623e-03 0.00000000e+00 0.00000000e+00 1.22510561e-01
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  1.61290323e-02 2.76974416e-01 0.00000000e+00 0.00000000e+00
  7.78642937e-03 0.00000000e+00 0

Processing datasets (6, 11)


Processing datasets (1, 16)


Processing datasets (0, 3)


Processing datasets (7, 9)


Processing datasets (14, 17)


Processing datasets (10, 15)


Processing datasets (4, 10)


Processing datasets (11, 17)


Processing datasets (13, 17)


Processing datasets (8, 15)


Processing datasets (5, 12)


Processing datasets (4, 5)


Processing datasets (1, 6)


Processing datasets (3, 5)


Processing datasets (9, 16)


Processing datasets (2, 10)


Processing datasets (6, 17)


Processing datasets (2, 4)


Processing datasets (7, 13)


Processing datasets (0, 2)


Processing datasets (6, 16)


Processing datasets (7, 16)


Processing datasets (2, 15)


In [None]:
adata_spatial = sc.concat(
    adatas_cor,
    label="library_id",
    uns_merge="unique",
    keys=[
        k
        for d in [adata.uns["spatial"] for adata in adatas_cor]
        for k, v in d.items()
    ],
    index_unique="-",
)

In [None]:
sc.pp.neighbors(adata_spatial, use_rep="X_scanorama")
sc.tl.umap(adata_spatial)
sc.tl.leiden(adata_spatial, key_added="clusters")

In [None]:
sc.pl.umap(
    adata_spatial, color=["Sample_ID"], palette=sc.pl.palettes.default_20)

In [None]:
sc.pl.umap(
    adata_spatial, color=["Batch_ID"], palette=sc.pl.palettes.default_20)

In [None]:
sc.pl.umap(
    adata_spatial, color=["Slide_ID"], palette=sc.pl.palettes.default_20)

In [None]:
sc.pl.umap(
    adata_spatial, color=["Gender"], palette=sc.pl.palettes.default_20)

In [None]:
sc.pl.umap(
    adata_spatial, color=["Condition"], palette=sc.pl.palettes.default_20)

In [None]:
sc.pl.umap(
    adata_spatial, color=["zonation"], palette=sc.pl.palettes.default_20)

In [None]:
adata_concat = sc.concat(
    adata_list,
    label="library_id",
    uns_merge="unique",
    keys=[
        k
        for d in [adata.uns["spatial"] for adata in adata_list]
        for k, v in d.items()
    ],
    index_unique="-",
)

In [None]:
# Get pseudo-bulk profile
pdata = dc.get_pseudobulk(
    adata_concat,
    sample_col='Sample_ID',
    groups_col='zonation',
    layer='counts',
    mode='sum',
    min_cells=0,
    min_counts=0
)
pdata

In [None]:
dc.plot_psbulk_samples(pdata, groupby=['Sample_ID', 'zonation', 'Gender'], figsize=(12, 8))

In [None]:
# Get filtered pseudo-bulk profile
pdata = dc.get_pseudobulk(
    adata_concat,
    sample_col='Sample_ID',
    groups_col='zonation',
    layer='counts',
    mode='sum',
    min_cells=10,
    min_counts=1000
)
pdata

In [None]:
pdata.layers['counts'] = pdata.X.copy()

# Normalize, scale and compute pca
sc.pp.normalize_total(pdata, target_sum=1e6)
sc.pp.log1p(pdata)
sc.pp.scale(pdata, max_value=10)
sc.tl.pca(pdata)

# Return raw counts to X
dc.swap_layer(pdata, 'counts', X_layer_key=None, inplace=True)

In [None]:
print(pdata.X)

In [None]:
sc.pl.pca(pdata, color=['Gender', 'Condition', 'zonation'], ncols=1, show=True, size=300)
sc.pl.pca_variance_ratio(pdata)

In [None]:
dc.get_metadata_associations(
    pdata,
    obs_keys = ['Gender', 'Condition', 'zonation', 'psbulk_n_cells', 'psbulk_counts'], #metadata columns to associate to PCs
    obsm_key='X_pca',  # where the PCs are stored
    uns_key='pca_anova',  # where the results are stored
    inplace=True
)

In [None]:
plt.figure(figsize=(10,20))
dc.plot_associations(
    pdata,
    uns_key='pca_anova',  # summary statistics from the anova tests
    obsm_key='X_pca',  # where the PCs are stored
    stat_col='p_adj',  # which summary statistic to plot
    obs_annotation_cols = ['Gender','Condition', 'zonation'], # which sample annotations to plot
    titles=['Adjusted p-values from ANOVA', 'Principle component scores']
)
plt.show()

In [None]:
sc.pl.pca(pdata, color=['Gender'], ncols=1, show=True, size=300, components =['1,3'] )

In [None]:
sc.pl.pca(pdata, color=['zonation'], ncols=1, show=True, size=300, components =['2,3'] )

In [None]:
sc.pl.pca(pdata, color=['Condition'], ncols=1, show=True, size=300, components =['4,10'] )

# Males AAV9 Versus Control in Pericentral

In [None]:
adata_males_AAV9_pericentral =  pdata[(pdata.obs['Gender'] == 'Male') & (pdata.obs['Condition'] != 'AAV2-CMV-GFP') & (pdata.obs['zonation'] == 'Pericentral')].copy()

In [None]:
dc.plot_filter_by_expr(adata_males_AAV9_pericentral, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_males_AAV9_pericentral.obs

In [None]:
genes = dc.filter_by_expr(adata_males_AAV9_pericentral, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_males_AAV9_pericentral = adata_males_AAV9_pericentral[:, genes].copy()
adata_males_AAV9_pericentral

## Contrast between conditions

In [None]:
# Import DESeq2
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
from pydeseq2.default_inference import DefaultInference

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_males_AAV9_pericentral,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 68.66 seconds.

Fitting dispersion trend curve...


... done in 0.29 seconds.

Fitting MAP dispersions...


... done in 63.71 seconds.

Fitting LFCs...


... done in 4.25 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV9-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV9-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Pericentral_Males_AAV9'})
mat

In [None]:
sc.pl.violin(adata_males_AAV9_pericentral, keys=['Dbp', 'Ppard', 'Acss2'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
collectri = dc.get_collectri(organism='mouse', split_complexes=False)
collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Pericentral_Males_AAV9', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Pericentral_Males_AAV9'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Pericentral_Males_AAV9'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Pericentral_Males_AAV9', name='Myc', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Pericentral_Males_AAV9', name='Ppara', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Pericentral_Males_AAV9', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='PI3K', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Androgen', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

In [None]:
msigdb_mouse = pd.read_csv("msigdb_complete.csv")
msigdb_mouse

In [None]:
msigdb_mouse.collection.unique().tolist()

In [None]:
categories_1 = ['hallmark', 'positional']
categories_2 = ['immunesigdb', 'chemical_and_genetic_perturbations']
categories_3 = ['go_biological_process', 'go_cellular_component', 'go_molecular_function']
categories_4 = ['kegg_pathways', 'reactome_pathways']
                
filtered_msigdb_1 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_1)]
filtered_msigdb_2 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_2)]
filtered_msigdb_3 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_3)]
filtered_msigdb_4 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_4)]

In [None]:
filtered_msigdb_1 = filtered_msigdb_1[~filtered_msigdb_1.duplicated(['genesymbol', 'geneset'])]
filtered_msigdb_2 = filtered_msigdb_2[~filtered_msigdb_2.duplicated(['genesymbol', 'geneset'])]
filtered_msigdb_3 = filtered_msigdb_3[~filtered_msigdb_3.duplicated(['genesymbol', 'geneset'])]
filtered_msigdb_4 = filtered_msigdb_4[~filtered_msigdb_4.duplicated(['genesymbol', 'geneset'])]

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/male_AVV9_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/male_AVV9_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/male_AVV9_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_pericentral_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_pericentral_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_pericentral_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_pericentral_Results_msigdb_4.csv', index=True, index_label='Term')

# Males AAV9 Versus Control in Other

In [None]:
adata_males_AAV9_other =  pdata[(pdata.obs['Gender'] == 'Male') & (pdata.obs['Condition'] != 'AAV2-CMV-GFP') & (pdata.obs['zonation'] == 'Other')].copy()

In [None]:
dc.plot_filter_by_expr(adata_males_AAV9_other, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_males_AAV9_other.obs

In [None]:
genes = dc.filter_by_expr(adata_males_AAV9_other, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_males_AAV9_other = adata_males_AAV9_other[:, genes].copy()
adata_males_AAV9_other

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_males_AAV9_other,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 69.02 seconds.

Fitting dispersion trend curve...


... done in 0.31 seconds.

Fitting MAP dispersions...


... done in 68.32 seconds.

Fitting LFCs...


... done in 4.31 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV9-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV9-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Other_Males_AAV9'})
mat

In [None]:
sc.pl.violin(adata_males_AAV9_other, keys=['Gng11', 'Klf10', 'Socs3'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Other_Males_AAV9', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Other_Males_AAV9'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Other_Males_AAV9'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Other_Males_AAV9', name='Pin1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Other_Males_AAV9', name='Nr6a1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Other_Males_AAV9', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='EGFR', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='WNT', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

In [None]:
# msigdb_mouse = pd.read_csv("msigdb_complete.csv")
# msigdb_mouse

In [None]:
# msigdb_mouse.collection.unique().tolist()

In [None]:
# categories_1 = ['hallmark', 'positional']
# categories_2 = ['immunesigdb', 'chemical_and_genetic_perturbations']
# categories_3 = ['go_biological_process', 'go_cellular_component', 'go_molecular_function']
# categories_4 = ['kegg_pathways', 'reactome_pathways']
                
# filtered_msigdb_1 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_1)]
# filtered_msigdb_2 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_2)]
# filtered_msigdb_3 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_3)]
# filtered_msigdb_4 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_4)]

In [None]:
# filtered_msigdb_1 = filtered_msigdb_1[~filtered_msigdb_1.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_2 = filtered_msigdb_2[~filtered_msigdb_2.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_3 = filtered_msigdb_3[~filtered_msigdb_3.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_4 = filtered_msigdb_4[~filtered_msigdb_4.duplicated(['genesymbol', 'geneset'])]

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/male_AVV9_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/male_AVV9_vs_Control_other_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/male_AVV9_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_other_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_other_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_other_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_other_Results_msigdb_4.csv', index=True, index_label='Term')

# Males AAV9 Versus Control in Periportal

In [None]:
adata_males_AAV9_periportal =  pdata[(pdata.obs['Gender'] == 'Male') & (pdata.obs['Condition'] != 'AAV2-CMV-GFP') & (pdata.obs['zonation'] == 'Periportal')].copy()

In [None]:
dc.plot_filter_by_expr(adata_males_AAV9_periportal, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_males_AAV9_periportal.obs

In [None]:
genes = dc.filter_by_expr(adata_males_AAV9_periportal, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_males_AAV9_periportal = adata_males_AAV9_periportal[:, genes].copy()
adata_males_AAV9_periportal

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_males_AAV9_periportal,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 64.72 seconds.

Fitting dispersion trend curve...


... done in 0.31 seconds.

Fitting MAP dispersions...


... done in 68.13 seconds.

Fitting LFCs...


... done in 4.43 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV9-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV9-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Periportal_Males_AAV9'})
mat

In [None]:
sc.pl.violin(adata_males_AAV9_periportal, keys=['Dbp', 'Ppard', 'Acss2'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Periportal_Males_AAV9', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Periportal_Males_AAV9'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Periportal_Males_AAV9'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Periportal_Males_AAV9', name='Myc', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Periportal_Males_AAV9', name='Ppara', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Periportal_Males_AAV9', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='PI3K', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Androgen', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

In [None]:
# msigdb_mouse = pd.read_csv("msigdb_complete.csv")
# msigdb_mouse

In [None]:
# msigdb_mouse.collection.unique().tolist()

In [None]:
# categories_1 = ['hallmark', 'positional']
# categories_2 = ['immunesigdb', 'chemical_and_genetic_perturbations']
# categories_3 = ['go_biological_process', 'go_cellular_component', 'go_molecular_function']
# categories_4 = ['kegg_pathways', 'reactome_pathways']
                
# filtered_msigdb_1 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_1)]
# filtered_msigdb_2 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_2)]
# filtered_msigdb_3 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_3)]
# filtered_msigdb_4 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_4)]

In [None]:
# filtered_msigdb_1 = filtered_msigdb_1[~filtered_msigdb_1.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_2 = filtered_msigdb_2[~filtered_msigdb_2.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_3 = filtered_msigdb_3[~filtered_msigdb_3.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_4 = filtered_msigdb_4[~filtered_msigdb_4.duplicated(['genesymbol', 'geneset'])]

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/male_AVV9_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/male_AVV9_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/male_AVV9_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_periportal_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_periportal_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_periportal_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV9_vs_Control_periportal_Results_msigdb_4.csv', index=True, index_label='Term')

# Males AAV2 Versus Control in Pericentral

In [None]:
adata_males_AAV2_pericentral =  pdata[(pdata.obs['Gender'] == 'Male') & (pdata.obs['Condition'] != 'AAV9-CMV-GFP') & (pdata.obs['zonation'] == 'Pericentral')].copy()

In [None]:
dc.plot_filter_by_expr(adata_males_AAV2_pericentral, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_males_AAV2_pericentral.obs

In [None]:
genes = dc.filter_by_expr(adata_males_AAV2_pericentral, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_males_AAV2_pericentral = adata_males_AAV2_pericentral[:, genes].copy()
adata_males_AAV2_pericentral

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_males_AAV2_pericentral,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 61.32 seconds.

Fitting dispersion trend curve...


... done in 0.27 seconds.

Fitting MAP dispersions...


... done in 57.33 seconds.

Fitting LFCs...


... done in 3.96 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV2-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV2-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Pericentral_Males_AAV2'})
mat

In [None]:
sc.pl.violin(adata_males_AAV2_pericentral, keys=['Dbp', 'Ppard', 'Acss2'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Pericentral_Males_AAV2', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Pericentral_Males_AAV2'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Pericentral_Males_AAV2'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Pericentral_Males_AAV2', name='Myc', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Pericentral_Males_AAV2', name='Ppara', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Pericentral_Males_AAV2', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='PI3K', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Androgen', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/male_AVV2_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/male_AVV2_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/male_AVV2_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_pericentral_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_pericentral_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_pericentral_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_pericentral_Results_msigdb_4.csv', index=True, index_label='Term')

# Males AAV2 Versus Control in Other

In [None]:
adata_males_AAV2_other =  pdata[(pdata.obs['Gender'] == 'Male') & (pdata.obs['Condition'] != 'AAV9-CMV-GFP') & (pdata.obs['zonation'] == 'Other')].copy()

In [None]:
dc.plot_filter_by_expr(adata_males_AAV2_other, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_males_AAV2_other.obs

In [None]:
genes = dc.filter_by_expr(adata_males_AAV2_other, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_males_AAV2_other = adata_males_AAV2_other[:, genes].copy()
adata_males_AAV2_other

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_males_AAV2_other,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 77.10 seconds.

Fitting dispersion trend curve...


... done in 0.35 seconds.

Fitting MAP dispersions...


... done in 78.12 seconds.

Fitting LFCs...


... done in 4.77 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV2-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV2-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Other_Males_AAV2'})
mat

In [None]:
sc.pl.violin(adata_males_AAV2_other, keys=['Dbp', 'Ppard', 'Acss2'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Other_Males_AAV2', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Other_Males_AAV2'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Other_Males_AAV2'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Other_Males_AAV2', name='Myc', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Other_Males_AAV2', name='Ppara', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Other_Males_AAV2', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='PI3K', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Androgen', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/male_AVV2_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/male_AVV2_vs_Control_other_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/male_AVV2_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_other_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_other_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_other_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_other_Results_msigdb_4.csv', index=True, index_label='Term')

# Males AAV2 Versus Control in Periportal

In [None]:
adata_males_AAV2_periportal =  pdata[(pdata.obs['Gender'] == 'Male') & (pdata.obs['Condition'] != 'AAV9-CMV-GFP') & (pdata.obs['zonation'] == 'Periportal')].copy()

In [None]:
dc.plot_filter_by_expr(adata_males_AAV2_periportal, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_males_AAV2_periportal.obs

In [None]:
genes = dc.filter_by_expr(adata_males_AAV2_periportal, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_males_AAV2_periportal = adata_males_AAV2_periportal[:, genes].copy()
adata_males_AAV2_periportal

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_males_AAV2_periportal,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 64.60 seconds.

Fitting dispersion trend curve...


... done in 0.33 seconds.

Fitting MAP dispersions...


... done in 69.40 seconds.

Fitting LFCs...


... done in 4.51 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV2-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV2-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Periportal_Males_AAV2'})
mat

In [None]:
sc.pl.violin(adata_males_AAV2_periportal, keys=['Dbp', 'Ppard', 'Acss2'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Periportal_Males_AAV2', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Periportal_Males_AAV2'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Periportal_Males_AAV2'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Periportal_Males_AAV2', name='Myc', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Periportal_Males_AAV2', name='Ppara', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Periportal_Males_AAV2', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='PI3K', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Androgen', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/male_AVV2_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/male_AVV2_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/male_AVV2_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_periportal_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_periportal_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_periportal_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/male_AVV2_vs_Control_periportal_Results_msigdb_4.csv', index=True, index_label='Term')

# Females AAV9 Versus Control in Pericentral

In [None]:
adata_females_AAV9_pericentral =  pdata[(pdata.obs['Gender'] == 'Female') & (pdata.obs['Condition'] != 'AAV2-CMV-GFP') & (pdata.obs['zonation'] == 'Pericentral')].copy()

In [None]:
dc.plot_filter_by_expr(adata_females_AAV9_pericentral, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_females_AAV9_pericentral.obs

In [None]:
genes = dc.filter_by_expr(adata_females_AAV9_pericentral, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_females_AAV9_pericentral = adata_females_AAV9_pericentral[:, genes].copy()
adata_females_AAV9_pericentral

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_females_AAV9_pericentral,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 61.10 seconds.

Fitting dispersion trend curve...


... done in 0.24 seconds.

Fitting MAP dispersions...


... done in 56.47 seconds.

Fitting LFCs...


... done in 3.98 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV9-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV9-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Pericentral_Females_AAV9'})
mat

In [None]:
sc.pl.violin(adata_females_AAV9_pericentral, keys=['Pck1', 'Hacl1', 'Eepd1'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Pericentral_Females_AAV9', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Pericentral_Females_AAV9'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Pericentral_Females_AAV9'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Pericentral_Females_AAV9', name='Clock', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Pericentral_Females_AAV9', name='Ppara', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Pericentral_Females_AAV9', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='TGFb', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Hypoxia', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/female_AVV9_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/female_AVV9_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/female_AVV9_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_pericentral_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_pericentral_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_pericentral_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_pericentral_Results_msigdb_4.csv', index=True, index_label='Term')

# Females AAV9 Versus Control in Other

In [None]:
adata_females_AAV9_other =  pdata[(pdata.obs['Gender'] == 'Female') & (pdata.obs['Condition'] != 'AAV2-CMV-GFP') & (pdata.obs['zonation'] == 'Other')].copy()

In [None]:
dc.plot_filter_by_expr(adata_females_AAV9_other, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_females_AAV9_other.obs

In [None]:
genes = dc.filter_by_expr(adata_females_AAV9_other, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_females_AAV9_other = adata_females_AAV9_other[:, genes].copy()
adata_females_AAV9_other

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_females_AAV9_other,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 79.90 seconds.

Fitting dispersion trend curve...


... done in 0.33 seconds.

Fitting MAP dispersions...


... done in 78.53 seconds.

Fitting LFCs...


... done in 4.86 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV9-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV9-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Other_Females_AAV9'})
mat

In [None]:
sc.pl.violin(adata_females_AAV9_other, keys=['Btg1', 'Tef', 'Nfil3'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Other_Females_AAV9', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Other_Females_AAV9'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Other_Females_AAV9'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Other_Females_AAV9', name='Atf6', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Other_Females_AAV9', name='Nr1d1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Other_Females_AAV9', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Hypoxia', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='MAPK', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

In [None]:
# msigdb_mouse = pd.read_csv("msigdb_complete.csv")
# msigdb_mouse

In [None]:
# msigdb_mouse.collection.unique().tolist()

In [None]:
# categories_1 = ['hallmark', 'positional']
# categories_2 = ['immunesigdb', 'chemical_and_genetic_perturbations']
# categories_3 = ['go_biological_process', 'go_cellular_component', 'go_molecular_function']
# categories_4 = ['kegg_pathways', 'reactome_pathways']
                
# filtered_msigdb_1 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_1)]
# filtered_msigdb_2 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_2)]
# filtered_msigdb_3 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_3)]
# filtered_msigdb_4 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_4)]

In [None]:
# filtered_msigdb_1 = filtered_msigdb_1[~filtered_msigdb_1.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_2 = filtered_msigdb_2[~filtered_msigdb_2.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_3 = filtered_msigdb_3[~filtered_msigdb_3.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_4 = filtered_msigdb_4[~filtered_msigdb_4.duplicated(['genesymbol', 'geneset'])]

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/female_AVV9_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/female_AVV9_vs_Control_other_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/female_AVV9_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_other_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_other_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_other_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_other_Results_msigdb_4.csv', index=True, index_label='Term')

# Females AAV9 Versus Control in Periportal

In [None]:
adata_females_AAV9_periportal =  pdata[(pdata.obs['Gender'] == 'Female') & (pdata.obs['Condition'] != 'AAV2-CMV-GFP') & (pdata.obs['zonation'] == 'Periportal')].copy()

In [None]:
dc.plot_filter_by_expr(adata_females_AAV9_periportal, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_females_AAV9_periportal.obs

In [None]:
genes = dc.filter_by_expr(adata_females_AAV9_periportal, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_females_AAV9_periportal = adata_females_AAV9_periportal[:, genes].copy()
adata_females_AAV9_periportal

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_females_AAV9_periportal,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 75.36 seconds.

Fitting dispersion trend curve...


... done in 0.29 seconds.

Fitting MAP dispersions...


... done in 69.61 seconds.

Fitting LFCs...


... done in 4.62 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV9-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV9-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Periportal_Females_AAV9'})
mat

In [None]:
sc.pl.violin(adata_females_AAV9_periportal, keys=['Dbp', 'Gpr146', 'Tubb2a'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Periportal_Females_AAV9', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Periportal_Females_AAV9'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Periportal_Females_AAV9'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Periportal_Females_AAV9', name='Atf', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Periportal_Females_AAV9', name='Hlf', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Periportal_Females_AAV9', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Androgen', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='WNT', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

In [None]:
# msigdb_mouse = pd.read_csv("msigdb_complete.csv")
# msigdb_mouse

In [None]:
# msigdb_mouse.collection.unique().tolist()

In [None]:
# categories_1 = ['hallmark', 'positional']
# categories_2 = ['immunesigdb', 'chemical_and_genetic_perturbations']
# categories_3 = ['go_biological_process', 'go_cellular_component', 'go_molecular_function']
# categories_4 = ['kegg_pathways', 'reactome_pathways']
                
# filtered_msigdb_1 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_1)]
# filtered_msigdb_2 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_2)]
# filtered_msigdb_3 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_3)]
# filtered_msigdb_4 = msigdb_mouse[msigdb_mouse['collection'].isin(categories_4)]

In [None]:
# filtered_msigdb_1 = filtered_msigdb_1[~filtered_msigdb_1.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_2 = filtered_msigdb_2[~filtered_msigdb_2.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_3 = filtered_msigdb_3[~filtered_msigdb_3.duplicated(['genesymbol', 'geneset'])]
# filtered_msigdb_4 = filtered_msigdb_4[~filtered_msigdb_4.duplicated(['genesymbol', 'geneset'])]

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/female_AVV9_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/female_AVV9_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/female_AVV9_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_periportal_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_periportal_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_periportal_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV9_vs_Control_periportal_Results_msigdb_4.csv', index=True, index_label='Term')

# Females AAV2 Versus Control in Pericentral

In [None]:
adata_females_AAV2_pericentral =  pdata[(pdata.obs['Gender'] == 'Female') & (pdata.obs['Condition'] != 'AAV9-CMV-GFP') & (pdata.obs['zonation'] == 'Pericentral')].copy()

In [None]:
dc.plot_filter_by_expr(adata_females_AAV2_pericentral, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_females_AAV2_pericentral.obs

In [None]:
genes = dc.filter_by_expr(adata_females_AAV2_pericentral, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_females_AAV2_pericentral = adata_females_AAV2_pericentral[:, genes].copy()
adata_females_AAV2_pericentral

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_females_AAV2_pericentral,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 63.51 seconds.

Fitting dispersion trend curve...


... done in 0.25 seconds.

Fitting MAP dispersions...


... done in 54.01 seconds.

Fitting LFCs...


... done in 3.96 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV2-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV2-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Pericentral_Females_AAV2'})
mat

In [None]:
sc.pl.violin(adata_females_AAV2_pericentral, keys=['Ppp1r10', 'Gdf15', 'Junb'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Pericentral_Females_AAV2', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Pericentral_Females_AAV2'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Pericentral_Females_AAV2'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Pericentral_Females_AAV2', name='Nr3c1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Pericentral_Females_AAV2', name='Tp63', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Pericentral_Females_AAV2', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='EGFR', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='JAK-STAT', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/female_AVV2_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/female_AVV2_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/female_AVV2_vs_Control_pericentral_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_pericentral_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_pericentral_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_pericentral_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_pericentral_Results_msigdb_4.csv', index=True, index_label='Term')

# Females AAV2 Versus Control in Other

In [None]:
adata_females_AAV2_other =  pdata[(pdata.obs['Gender'] == 'Female') & (pdata.obs['Condition'] != 'AAV9-CMV-GFP') & (pdata.obs['zonation'] == 'Other')].copy()

In [None]:
dc.plot_filter_by_expr(adata_females_AAV2_other, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_females_AAV2_other.obs

In [None]:
genes = dc.filter_by_expr(adata_females_AAV2_other, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_females_AAV2_other = adata_females_AAV2_other[:, genes].copy()
adata_females_AAV2_other

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_females_AAV2_other,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 71.21 seconds.

Fitting dispersion trend curve...


... done in 0.28 seconds.

Fitting MAP dispersions...


... done in 69.64 seconds.

Fitting LFCs...


... done in 4.36 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV2-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV2-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Other_Females_AAV2'})
mat

In [None]:
sc.pl.violin(adata_females_AAV2_other, keys=['Fkbp11', 'Gdf15', 'Junb'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Other_Females_AAV2', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Other_Females_AAV2'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Other_Females_AAV2'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Other_Females_AAV2', name='Atf6', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Other_Females_AAV2', name='Brca1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Other_Females_AAV2', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='EGFR', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Hypoxia', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
# literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
# literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/female_AVV2_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/female_AVV2_vs_Control_other_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/female_AVV2_vs_Control_other_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_other_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_other_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_other_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_other_Results_msigdb_4.csv', index=True, index_label='Term')

# Females AAV2 Versus Control in Periportal

In [None]:
adata_females_AAV2_periportal =  pdata[(pdata.obs['Gender'] == 'Female') & (pdata.obs['Condition'] != 'AAV9-CMV-GFP') & (pdata.obs['zonation'] == 'Periportal')].copy()

In [None]:
dc.plot_filter_by_expr(adata_females_AAV2_periportal, group = 'Condition', min_count=10, min_total_count=40)

In [None]:
adata_females_AAV2_periportal.obs

In [None]:
genes = dc.filter_by_expr(adata_females_AAV2_periportal, group='Condition', min_count=10, min_total_count=40)

# Filter by these genes
adata_females_AAV2_periportal = adata_females_AAV2_periportal[:, genes].copy()
adata_females_AAV2_periportal

## Contrast between conditions

In [None]:
# Build DESeq2 object
inference = DefaultInference(n_cpus=1)
dds = DeseqDataSet(
    adata= adata_females_AAV2_periportal,
    design_factors=['Condition'],
    ref_level=['Condition', 'Untreated'],
    refit_cooks=True,
    inference=inference,
    n_cpus = 1)

In [None]:
dds.obsm['design_matrix']

In [None]:
dds.deseq2()

Fitting dispersions...


... done in 69.30 seconds.

Fitting dispersion trend curve...


... done in 0.26 seconds.

Fitting MAP dispersions...


... done in 63.10 seconds.

Fitting LFCs...


... done in 4.20 seconds.

Replacing 0 outlier genes.



In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["Condition", 'AAV2-CMV-GFP', "Untreated"], inference=inference)

In [None]:
# Compute Wald test
stat_res.summary(n_cpus = 1)

In [None]:
# Shrink LFCs
stat_res.lfc_shrink(coeff='Condition_AAV2-CMV-GFP_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20)

In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'Periportal_females_AAV2'})
mat

In [None]:
sc.pl.violin(adata_females_AAV2_periportal, keys=['Hsd3b5', 'Gdf15', 'Fos'], groupby='Condition', rotation=90)

## Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
# collectri = dc.get_collectri(organism='mouse', split_complexes=False)
# collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
dc.plot_barplot(tf_acts, 'Periportal_females_AAV2', top=25, vertical=True)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'Periportal_females_AAV2'})
pvals = results_df[['padj']].T.rename(index={'padj': 'Periportal_females_AAV2'})

# Plot
dc.plot_volcano(logFCs, pvals, 'Periportal_females_AAV2', name='Nr3c1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'Periportal_females_AAV2', name='Smad4', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

## Pathway activity inference

In [None]:
# progeny_mouse = pd.read_csv("model_progeny500_mouse_decoupleR.csv")
# progeny_mouse

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny_mouse)

In [None]:
pathway_acts

In [None]:
dc.plot_barplot(pathway_acts, 'Periportal_females_AAV2', top=25, vertical=False)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Hypoxia', net=progeny_mouse, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='TGFb', net=progeny_mouse, top=15)

## Functional enrichment of biological terms

### Hallmarks and Chromosome location

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_1 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_1,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_1.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_1[enr_pvals_filtered_msigdb_1['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### immunesigdb and chemical and genetic perturbations

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_2 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_2,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_2.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_2[enr_pvals_filtered_msigdb_2['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Gene Ontology Terms

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_3 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_3,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_3.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_3[enr_pvals_filtered_msigdb_3['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Pathways

In [None]:
# Run gsea
enr_pvals_filtered_msigdb_4 = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=filtered_msigdb_4,
    source='geneset',
    target='genesymbol',
    verbose= True
)

enr_pvals_filtered_msigdb_4.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered  = enr_pvals_filtered_msigdb_4[enr_pvals_filtered_msigdb_4['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(35)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(7, 10), color=enr_pvals_filtered_color.color.tolist())

### Matching with Genes extracted from the literature

We here read the list of genes that are relevant in the context of AAV treatments according to the literate and we match them with the results of our DGE analysis (p Adjusted < 0.05). 

In [None]:
results_df[results_df['padj'] < 0.05].merge(literature_list, how='inner', left_index=True, right_index=True)

## Saving All results for joint plotting in other scripts

In [None]:
## DEG results
results_df.to_csv('analyzed/DE/DE_Results_14c/DEG/female_AVV2_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Pathway Results
pathways_all = pd.merge(pathway_acts.transpose(), pathway_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
pathways_all.to_csv('analyzed/DE/DE_Results_14c/Pathway_results/female_AVV2_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')                  

In [None]:
## TF activity results
tf_all = pd.merge(tf_acts.transpose(), tf_pvals.transpose(), left_index=True, right_index=True, suffixes=('_ActivityScore', '_pvalue'))
tf_all.to_csv('analyzed/DE/DE_Results_14c/TF_results/female_AVV2_vs_Control_periportal_Results.csv', index=True, index_label='GeneName')

In [None]:
## Enrichment Results
enr_pvals_filtered_msigdb_1.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_periportal_Results_msigdb_1.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_2.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_periportal_Results_msigdb_2.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_3.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_periportal_Results_msigdb_3.csv', index=True, index_label='Term')
enr_pvals_filtered_msigdb_4.to_csv('analyzed/DE/DE_Results_14c/GSEA_results/female_AVV2_vs_Control_periportal_Results_msigdb_4.csv', index=True, index_label='Term')

In [None]:
! jupyter nbconvert --to html 14c_PseudoBulk_Joint_FunctionalAnalysis.ipynb

[NbConvertApp] Writing 1265607 bytes to 14c_PseudoBulk_Joint_FunctionalAnalysis.html
