Skip to content

Commit

Permalink
TMS gene paper
Browse files Browse the repository at this point in the history
  • Loading branch information
martinjzhang committed Dec 23, 2019
1 parent fd90be5 commit 6c22d8e
Show file tree
Hide file tree
Showing 10 changed files with 7,259 additions and 0 deletions.
346 changes: 346 additions & 0 deletions 2_aging_signature/DE_tissue_FACS.ipynb
@@ -0,0 +1,346 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"sns.set(style=\"whitegrid\")\n",
"import numpy as np\n",
"import scanpy.api as sc\n",
"from anndata import read_h5ad\n",
"from anndata import AnnData\n",
"import scipy as sp\n",
"import scipy.stats\n",
"from gprofiler import GProfiler\n",
"import pickle\n",
"# Other specific functions \n",
"from itertools import product\n",
"from statsmodels.stats.multitest import multipletests\n",
"import util\n",
"# R related packages \n",
"import rpy2.rinterface_lib.callbacks\n",
"import logging\n",
"from rpy2.robjects import pandas2ri\n",
"import anndata2ri"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The rpy2.ipython extension is already loaded. To reload it, use:\n",
" %reload_ext rpy2.ipython\n",
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"scanpy==1.4.3 anndata==0.6.20 umap==0.3.8 numpy==1.16.4 scipy==1.2.1 pandas==0.25.0 scikit-learn==0.21.1 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 \n"
]
}
],
"source": [
"# Ignore R warning messages\n",
"#Note: this can be commented out to get more verbose R output\n",
"rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)\n",
"# Automatically convert rpy2 outputs to pandas dataframes\n",
"pandas2ri.activate()\n",
"anndata2ri.activate()\n",
"%load_ext rpy2.ipython\n",
"# autoreload\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"# logging\n",
"sc.logging.print_versions()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"library(MAST)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Data path\n",
"data_path = '/data3/martin/tms_gene_data'\n",
"output_folder = data_path + '/DE_result'"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Load the data \n",
"adata_combine = util.load_normalized_data(data_path)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"temp_facs = adata_combine[adata_combine.obs['b_method']=='facs',]\n",
"temp_droplet = adata_combine[adata_combine.obs['b_method']=='droplet',]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate a list of tissues for DE testing"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tongue, n_young=1418, n_old=2317\n",
"Thymus, n_young=1359, n_old=2716\n",
"Pancreas, n_young=1588, n_old=1796\n",
"Brain_Non-Myeloid, n_young=3650, n_old=4879\n",
"Trachea, n_young=1354, n_old=1806\n",
"Kidney, n_young=561, n_old=1620\n",
"Bladder, n_young=1383, n_old=1790\n",
"Heart, n_young=4464, n_old=5405\n",
"Spleen, n_young=1702, n_old=2132\n",
"BAT, n_young=713, n_old=1510\n",
"Diaphragm, n_young=903, n_old=955\n",
"Aorta, n_young=557, n_old=781\n",
"Skin, n_young=2346, n_old=2514\n",
"Brain_Myeloid, n_young=4532, n_old=9044\n",
"Liver, n_young=731, n_old=2128\n",
"SCAT, n_young=1721, n_old=2034\n",
"Lung, n_young=1743, n_old=4332\n",
"GAT, n_young=1464, n_old=1942\n",
"Mammary_Gland, n_young=2414, n_old=642\n",
"Limb_Muscle, n_young=1102, n_old=2753\n",
"Marrow, n_young=5069, n_old=9449\n",
"MAT, n_young=1187, n_old=1827\n",
"Large_Intestine, n_young=3987, n_old=4324\n"
]
}
],
"source": [
"tissue_list = list(set(temp_facs.obs['tissue']))\n",
"min_cell_number = 1\n",
"analysis_list = []\n",
"analysis_info = {}\n",
"# for cell_type in cell_type_list:\n",
"for tissue in tissue_list:\n",
" analyte = tissue\n",
" ind_select = (temp_facs.obs['tissue'] == tissue)\n",
" n_young = (temp_facs.obs['age'][ind_select].isin(['1m', '3m'])).sum()\n",
" n_old = (temp_facs.obs['age'][ind_select].isin(['18m', '21m',\n",
" '24m', '30m'])).sum()\n",
" analysis_info[analyte] = {}\n",
" analysis_info[analyte]['n_young'] = n_young\n",
" analysis_info[analyte]['n_old'] = n_old\n",
" if (n_young>min_cell_number) & (n_old>min_cell_number):\n",
" print('%s, n_young=%d, n_old=%d'%(analyte, n_young, n_old))\n",
" analysis_list.append(analyte)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### DE using R package MAST "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tongue 0/23\n",
"Thymus 1/23\n",
"Pancreas 2/23\n",
"Brain_Non-Myeloid 3/23\n",
"Trachea 4/23\n",
"Kidney 5/23\n",
"Bladder 6/23\n",
"Heart 7/23\n",
"Spleen 8/23\n",
"BAT 9/23\n",
"Diaphragm 10/23\n",
"Aorta 11/23\n",
"Skin 12/23\n",
"Brain_Myeloid 13/23\n",
"Liver 14/23\n",
"SCAT 15/23\n",
"Lung 16/23\n",
"GAT 17/23\n",
"Mammary_Gland 18/23\n",
"Limb_Muscle 19/23\n",
"Marrow 20/23\n",
"MAT 21/23\n",
"Large_Intestine 22/23\n"
]
}
],
"source": [
"## DE testing\n",
"gene_name_list = np.array(temp_facs.var_names)\n",
"DE_result_MAST = {}\n",
"for i_analyte,analyte in enumerate(analysis_list):\n",
" print(analyte, '%d/%d'%(i_analyte, len(analysis_list)))\n",
" tissue = analyte\n",
"# tissue,cell_type = analyte.split('.')\n",
" ind_select = (temp_facs.obs['tissue'] == tissue)\n",
" adata_temp = temp_facs[ind_select,]\n",
" # reformatting\n",
" adata_temp.X = np.array(adata_temp.X.todense())\n",
" adata_temp.obs['condition'] = [int(x[:-1]) for x in adata_temp.obs['age']] \n",
" adata_temp.obs = adata_temp.obs[['condition', 'sex']]\n",
" if len(set(adata_temp.obs['sex'])) <2:\n",
" covariate = ''\n",
" else:\n",
" covariate = '+sex'\n",
" # toy example\n",
"# covariate = ''\n",
"# np.random.seed(0)\n",
"# ind_select = np.random.permutation(adata_temp.shape[0])[0:100]\n",
"# ind_select = np.sort(ind_select)\n",
"# adata_temp = adata_temp[:, 0:200]\n",
"# adata_temp.X[:,0] = (adata_temp.obs['sex'] == 'male')*3\n",
"# adata_temp.X[:,1] = (adata_temp.obs['condition'])*3\n",
" # DE using MAST \n",
" R_cmd = util.call_MAST_age()\n",
" get_ipython().run_cell_magic(u'R', u'-i adata_temp -i covariate -o de_res', R_cmd)\n",
" de_res.columns = ['gene', 'raw-p', 'coef', 'bh-p']\n",
" de_res.index = de_res['gene']\n",
" DE_result_MAST[analyte] = pd.DataFrame(index = gene_name_list)\n",
" DE_result_MAST[analyte] = DE_result_MAST[analyte].join(de_res)\n",
" # fc between yound and old\n",
" X = adata_temp.X\n",
" y = (adata_temp.obs['condition']>10)\n",
" DE_result_MAST[analyte]['fc'] = X[y,:].mean(axis=0) - X[~y,:].mean(axis=0)\n",
"# break"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save DE results"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"with open(output_folder+'/DE_tissue_FACS.pickle', 'wb') as handle:\n",
" pickle.dump(DE_result_MAST, handle)\n",
" pickle.dump(analysis_list, handle)\n",
" pickle.dump(analysis_info, handle)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Validation"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fat 12428\n",
"Liver 8147\n",
"Limb_Muscle 8066\n",
"Trachea 9606\n",
"Pancreas 10145\n",
"Marrow 10720\n",
"Tongue 8795\n",
"Lung 5588\n",
"Spleen 2500\n",
"Heart 8132\n",
"Skin 8647\n",
"Bladder 11658\n",
"Kidney 1001\n",
"Thymus 4762\n",
"Brain_Myeloid 9601\n",
"Mammary_Gland 5029\n",
"Brain_Non-Myeloid 12855\n",
"Large_Intestine 10159\n"
]
}
],
"source": [
"for analyte in DE_result_MAST.keys():\n",
" print(analyte, np.sum(DE_result_MAST[analyte]['bh-p']<0.00001))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 6c22d8e

Please sign in to comment.