In [1]:
import os 
import pickle
import numpy as np 
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import anndata as ad
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from glob import glob

sc.settings.verbosity = 1             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, frameon=False, figsize=(3, 3), facecolor='white')

sc.logging.print_header()

scanpy==1.7.2 anndata==0.7.6 umap==0.5.1 numpy==1.20.3 scipy==1.7.1 pandas==1.3.3 scikit-learn==1.0 statsmodels==0.13.0 python-igraph==0.9.8


In [2]:
#https://stackoverflow.com/questions/21884271/warning-about-too-many-open-figures
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})

plt.close('all')

In [3]:
from matplotlib.pyplot import rc_context

In [4]:
NSG=sc.read_h5ad('totalVI/NSG-denoised-labeled.h5ad.gz')
Balb=sc.read_h5ad('totalVI/Balb-denoised-labeled.h5ad.gz')
Rag=sc.read_h5ad('totalVI/Rag-denoised-labeled.h5ad.gz')
Nuj=sc.read_h5ad('totalVI/Nuj-denoised-labeled.h5ad.gz')

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [5]:
def make_ep_df (adata):
    df = pd.DataFrame(
        data=adata.layers['denoised_rna'], 
        columns=adata.var.index,
        index=adata.obs.index,
    ).T
    return df

## NSG

In [6]:
NSG_ep = NSG[NSG.obs.leiden.isin([ep for ep in set(NSG.obs.leiden) if 'ep' in ep])]

In [7]:
NSG_ep_df = make_ep_df(NSG_ep)

## Balb

In [8]:
Balb_ep = Balb[Balb.obs.leiden.isin([ep for ep in set(Balb.obs.leiden) if 'ep' in ep])]

In [9]:
Balb_ep_df = make_ep_df(Balb_ep)

## Rag

In [10]:
Rag_ep = Rag[Rag.obs.leiden.isin([ep for ep in set(Rag.obs.leiden) if 'ep' in ep])]

In [11]:
Rag_ep_df = make_ep_df(Rag_ep)

## Nuj

In [12]:
Nuj_ep = Nuj[Nuj.obs.leiden.isin([ep for ep in set(Nuj.obs.leiden) if 'ep' in ep])]

In [13]:
Nuj_ep_df = make_ep_df(Nuj_ep)

## Run differential expression analysis 

In [14]:
counts = pd.concat([NSG_ep_df,Balb_ep_df,Rag_ep_df, Nuj_ep_df],axis=1)

In [15]:
colData = pd.concat([
    pd.concat([
        ep.obs[['leiden']] for ep in [NSG_ep,Balb_ep,Rag_ep, Nuj_ep]
    ]),
    pd.concat([
        ep.obs[['batch']] for ep in [NSG_ep,Balb_ep,Rag_ep, Nuj_ep]
    ])
],axis=1)

colData['cond'] = colData.batch.str[:-1]
colData['rep'] = colData.batch.str[-1]

In [16]:
counts.shape

(32285, 10151)

In [17]:
colData.shape

(10151, 4)

In [18]:
%%time 
name = 'ep_clusters'

counts.to_csv(f'deseq/{name}_counts.txt',sep='\t')
colData.to_csv(f'deseq/{name}_colData.txt',sep='\t')

In [19]:
!head differential-analysis.R 

args <- commandArgs(trailingOnly = TRUE)

countsDIR <- args[1]
colDataDIR <- args[2]
name <- args[3]
# PDIR <- args[1]
# refCOND <- [4]
# JOBS <- args[5]
# setwd(PDIR)
dir.create(paste0('deseq/',name))


___
activate my conda env `deseq2`

In [6]:
%%bash
Rscript differential-analysis.R \
    deseq/ep_clusters_counts.txt \
    deseq/ep_clusters_colData.txt \
    ep_clusters

[1] "counts loaded!"
[1] 32245 10151
[1] "colData loaded!"
[1] 10151     4


In dir.create(paste0("deseq/", name)) : 'deseq/ep_clusters' already exists
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Part 1. 
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Part 2. 
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
object created!
DGEList
Model:
-11000-10100-10010010-10001-10
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Part 3. 
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Saving 7 x 7 in image
Saving 7 x 7 in image
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@ DONE! :-)@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Thu Feb  3 14:37:57 2022


In [3]:
%%bash 
export PAGEDIR='/data_gilbert/home/aarab/iPAGE'

cd deseq/ep_clusters/

bash ../../ipage_loop.sh delta_exp_Nuj_vs_Rag.txt &> delta_exp_Nuj_vs_Rag.txt_ipage.out;
bash ../../ipage_loop.sh delta_exp_Rag_vs_Balb.txt &> delta_exp_Rag_vs_Balb.txt_ipage.out;

# for f in *.txt; do 
#     echo bash ../../ipage_loop.sh $f 
#     #&> ${f}_ipage.out;
# done

cd ../../

In [9]:
!zip -r ep_clusters.zip deseq/ep_clusters/

Scanning files .....^C


In [8]:
ls 

cellranger.sh                network-analysis-GRN.ipynb
citeseq-antibodies.txt       network-analysis-metabolic.ipynb
[0m[38;5;27mcounts[0m/                      network-analysis-PPI.ipynb
[38;5;27mdeseq[0m/                       [38;5;27mpreprocessing[0m/
differential-analysis.ipynb  preprocessing.ipynb
differential-analysis.R      Rplots.pdf
enrichment-analysis.ipynb    run-differential-analysis.ipynb
[38;5;27mfastq[0m/                       run.out
feature_ref.csv              run.sh
[38;5;27mfigures[0m/                     [38;5;27mtotalVI[0m/
ipage_loop.sh                totalvi-BZ.ipynb
library.csv


In [7]:
!date

^C
