Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fd90be5
commit 6c22d8e
Showing
10 changed files
with
7,259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,346 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import seaborn as sns\n", | ||
"sns.set(style=\"whitegrid\")\n", | ||
"import numpy as np\n", | ||
"import scanpy.api as sc\n", | ||
"from anndata import read_h5ad\n", | ||
"from anndata import AnnData\n", | ||
"import scipy as sp\n", | ||
"import scipy.stats\n", | ||
"from gprofiler import GProfiler\n", | ||
"import pickle\n", | ||
"# Other specific functions \n", | ||
"from itertools import product\n", | ||
"from statsmodels.stats.multitest import multipletests\n", | ||
"import util\n", | ||
"# R related packages \n", | ||
"import rpy2.rinterface_lib.callbacks\n", | ||
"import logging\n", | ||
"from rpy2.robjects import pandas2ri\n", | ||
"import anndata2ri" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"The rpy2.ipython extension is already loaded. To reload it, use:\n", | ||
" %reload_ext rpy2.ipython\n", | ||
"The autoreload extension is already loaded. To reload it, use:\n", | ||
" %reload_ext autoreload\n", | ||
"scanpy==1.4.3 anndata==0.6.20 umap==0.3.8 numpy==1.16.4 scipy==1.2.1 pandas==0.25.0 scikit-learn==0.21.1 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Ignore R warning messages\n", | ||
"#Note: this can be commented out to get more verbose R output\n", | ||
"rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)\n", | ||
"# Automatically convert rpy2 outputs to pandas dataframes\n", | ||
"pandas2ri.activate()\n", | ||
"anndata2ri.activate()\n", | ||
"%load_ext rpy2.ipython\n", | ||
"# autoreload\n", | ||
"%load_ext autoreload\n", | ||
"%autoreload 2\n", | ||
"# logging\n", | ||
"sc.logging.print_versions()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%R\n", | ||
"library(MAST)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Load data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Data path\n", | ||
"data_path = '/data3/martin/tms_gene_data'\n", | ||
"output_folder = data_path + '/DE_result'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load the data \n", | ||
"adata_combine = util.load_normalized_data(data_path)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"temp_facs = adata_combine[adata_combine.obs['b_method']=='facs',]\n", | ||
"temp_droplet = adata_combine[adata_combine.obs['b_method']=='droplet',]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Generate a list of tissues for DE testing" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Tongue, n_young=1418, n_old=2317\n", | ||
"Thymus, n_young=1359, n_old=2716\n", | ||
"Pancreas, n_young=1588, n_old=1796\n", | ||
"Brain_Non-Myeloid, n_young=3650, n_old=4879\n", | ||
"Trachea, n_young=1354, n_old=1806\n", | ||
"Kidney, n_young=561, n_old=1620\n", | ||
"Bladder, n_young=1383, n_old=1790\n", | ||
"Heart, n_young=4464, n_old=5405\n", | ||
"Spleen, n_young=1702, n_old=2132\n", | ||
"BAT, n_young=713, n_old=1510\n", | ||
"Diaphragm, n_young=903, n_old=955\n", | ||
"Aorta, n_young=557, n_old=781\n", | ||
"Skin, n_young=2346, n_old=2514\n", | ||
"Brain_Myeloid, n_young=4532, n_old=9044\n", | ||
"Liver, n_young=731, n_old=2128\n", | ||
"SCAT, n_young=1721, n_old=2034\n", | ||
"Lung, n_young=1743, n_old=4332\n", | ||
"GAT, n_young=1464, n_old=1942\n", | ||
"Mammary_Gland, n_young=2414, n_old=642\n", | ||
"Limb_Muscle, n_young=1102, n_old=2753\n", | ||
"Marrow, n_young=5069, n_old=9449\n", | ||
"MAT, n_young=1187, n_old=1827\n", | ||
"Large_Intestine, n_young=3987, n_old=4324\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"tissue_list = list(set(temp_facs.obs['tissue']))\n", | ||
"min_cell_number = 1\n", | ||
"analysis_list = []\n", | ||
"analysis_info = {}\n", | ||
"# for cell_type in cell_type_list:\n", | ||
"for tissue in tissue_list:\n", | ||
" analyte = tissue\n", | ||
" ind_select = (temp_facs.obs['tissue'] == tissue)\n", | ||
" n_young = (temp_facs.obs['age'][ind_select].isin(['1m', '3m'])).sum()\n", | ||
" n_old = (temp_facs.obs['age'][ind_select].isin(['18m', '21m',\n", | ||
" '24m', '30m'])).sum()\n", | ||
" analysis_info[analyte] = {}\n", | ||
" analysis_info[analyte]['n_young'] = n_young\n", | ||
" analysis_info[analyte]['n_old'] = n_old\n", | ||
" if (n_young>min_cell_number) & (n_old>min_cell_number):\n", | ||
" print('%s, n_young=%d, n_old=%d'%(analyte, n_young, n_old))\n", | ||
" analysis_list.append(analyte)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### DE using R package MAST " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Tongue 0/23\n", | ||
"Thymus 1/23\n", | ||
"Pancreas 2/23\n", | ||
"Brain_Non-Myeloid 3/23\n", | ||
"Trachea 4/23\n", | ||
"Kidney 5/23\n", | ||
"Bladder 6/23\n", | ||
"Heart 7/23\n", | ||
"Spleen 8/23\n", | ||
"BAT 9/23\n", | ||
"Diaphragm 10/23\n", | ||
"Aorta 11/23\n", | ||
"Skin 12/23\n", | ||
"Brain_Myeloid 13/23\n", | ||
"Liver 14/23\n", | ||
"SCAT 15/23\n", | ||
"Lung 16/23\n", | ||
"GAT 17/23\n", | ||
"Mammary_Gland 18/23\n", | ||
"Limb_Muscle 19/23\n", | ||
"Marrow 20/23\n", | ||
"MAT 21/23\n", | ||
"Large_Intestine 22/23\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"## DE testing\n", | ||
"gene_name_list = np.array(temp_facs.var_names)\n", | ||
"DE_result_MAST = {}\n", | ||
"for i_analyte,analyte in enumerate(analysis_list):\n", | ||
" print(analyte, '%d/%d'%(i_analyte, len(analysis_list)))\n", | ||
" tissue = analyte\n", | ||
"# tissue,cell_type = analyte.split('.')\n", | ||
" ind_select = (temp_facs.obs['tissue'] == tissue)\n", | ||
" adata_temp = temp_facs[ind_select,]\n", | ||
" # reformatting\n", | ||
" adata_temp.X = np.array(adata_temp.X.todense())\n", | ||
" adata_temp.obs['condition'] = [int(x[:-1]) for x in adata_temp.obs['age']] \n", | ||
" adata_temp.obs = adata_temp.obs[['condition', 'sex']]\n", | ||
" if len(set(adata_temp.obs['sex'])) <2:\n", | ||
" covariate = ''\n", | ||
" else:\n", | ||
" covariate = '+sex'\n", | ||
" # toy example\n", | ||
"# covariate = ''\n", | ||
"# np.random.seed(0)\n", | ||
"# ind_select = np.random.permutation(adata_temp.shape[0])[0:100]\n", | ||
"# ind_select = np.sort(ind_select)\n", | ||
"# adata_temp = adata_temp[:, 0:200]\n", | ||
"# adata_temp.X[:,0] = (adata_temp.obs['sex'] == 'male')*3\n", | ||
"# adata_temp.X[:,1] = (adata_temp.obs['condition'])*3\n", | ||
" # DE using MAST \n", | ||
" R_cmd = util.call_MAST_age()\n", | ||
" get_ipython().run_cell_magic(u'R', u'-i adata_temp -i covariate -o de_res', R_cmd)\n", | ||
" de_res.columns = ['gene', 'raw-p', 'coef', 'bh-p']\n", | ||
" de_res.index = de_res['gene']\n", | ||
" DE_result_MAST[analyte] = pd.DataFrame(index = gene_name_list)\n", | ||
" DE_result_MAST[analyte] = DE_result_MAST[analyte].join(de_res)\n", | ||
" # fc between yound and old\n", | ||
" X = adata_temp.X\n", | ||
" y = (adata_temp.obs['condition']>10)\n", | ||
" DE_result_MAST[analyte]['fc'] = X[y,:].mean(axis=0) - X[~y,:].mean(axis=0)\n", | ||
"# break" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Save DE results" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"with open(output_folder+'/DE_tissue_FACS.pickle', 'wb') as handle:\n", | ||
" pickle.dump(DE_result_MAST, handle)\n", | ||
" pickle.dump(analysis_list, handle)\n", | ||
" pickle.dump(analysis_info, handle)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Validation" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Fat 12428\n", | ||
"Liver 8147\n", | ||
"Limb_Muscle 8066\n", | ||
"Trachea 9606\n", | ||
"Pancreas 10145\n", | ||
"Marrow 10720\n", | ||
"Tongue 8795\n", | ||
"Lung 5588\n", | ||
"Spleen 2500\n", | ||
"Heart 8132\n", | ||
"Skin 8647\n", | ||
"Bladder 11658\n", | ||
"Kidney 1001\n", | ||
"Thymus 4762\n", | ||
"Brain_Myeloid 9601\n", | ||
"Mammary_Gland 5029\n", | ||
"Brain_Non-Myeloid 12855\n", | ||
"Large_Intestine 10159\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for analyte in DE_result_MAST.keys():\n", | ||
" print(analyte, np.sum(DE_result_MAST[analyte]['bh-p']<0.00001))\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.