In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")
import numpy as np
import scanpy.api as sc
from anndata import read_h5ad
from anndata import AnnData
import scipy as sp
import scipy.stats
from gprofiler import GProfiler
import pickle
# Other specific functions 
from itertools import product
from statsmodels.stats.multitest import multipletests
import util
# R related packages 
import rpy2.rinterface_lib.callbacks
import logging
from rpy2.robjects import pandas2ri
import anndata2ri



In [2]:
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython
# autoreload
%load_ext autoreload
%autoreload 2
# logging
sc.logging.print_versions()

scanpy==1.4.3 anndata==0.6.20 umap==0.3.8 numpy==1.16.4 scipy==1.2.1 pandas==0.25.0 scikit-learn==0.21.1 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


In [3]:
%%R
library(MAST)

## Load data

In [4]:
# Data path
data_path = '/data3/martin/tms_gene_data'
output_folder = data_path + '/DE_result'

In [5]:
# Load the data 
adata_bulk = util.load_normalized_data_bulk(data_path)

Trying to set attribute `.obs` of view, making a copy.


In [6]:
temp_bulk = adata_bulk.copy()

## DE, age, tissue, bulk

In [7]:
# cell_type_list = list(set(temp_facs.obs['cell_ontology_class']))
tissue_list = list(set(temp_bulk.obs['tissue']))
min_cell_number = 1
analysis_list = []
analysis_info = {}
# for cell_type in cell_type_list:
for tissue in tissue_list:
    analyte = tissue
    ind_select = (temp_bulk.obs['tissue'] == tissue)
    n_young = (temp_bulk.obs['age_num'][ind_select]<10).sum()
    n_old = (temp_bulk.obs['age_num'][ind_select]>10).sum()
    analysis_info[analyte] = {}
    analysis_info[analyte]['n_young'] = n_young
    analysis_info[analyte]['n_old'] = n_old
    if (n_young>min_cell_number) & (n_old>min_cell_number):
        print('%s, n_young=%d, n_old=%d'%(analyte, n_young, n_old))
        analysis_list.append(analyte)

Gonadal_Fat, n_young=24, n_old=31
Brown_Fat, n_young=23, n_old=31
Kidney, n_young=23, n_old=32
Brain, n_young=24, n_old=32
Heart, n_young=21, n_old=31
Subcutaneous_Fat, n_young=23, n_old=32
Marrow, n_young=23, n_old=30
Lung, n_young=23, n_old=32
White_Blood_Cells, n_young=24, n_old=31
Liver, n_young=24, n_old=30
Spleen, n_young=24, n_old=32
Small_Intestine, n_young=24, n_old=31
Pancreas, n_young=24, n_old=32
Limb_Muscle, n_young=23, n_old=31
Mesenteric_Fat, n_young=24, n_old=32
Bone, n_young=23, n_old=32
Skin, n_young=23, n_old=28


### DE using R package MAST 

In [9]:
## DE testing
gene_name_list = np.array(temp_bulk.var_names)
DE_result_MAST = {}
for i_analyte,analyte in enumerate(analysis_list):
    print(analyte, '%d/%d'%(i_analyte, len(analysis_list)))
    tissue = analyte
#     tissue,cell_type = analyte.split('.')
    ind_select = (temp_bulk.obs['tissue'] == tissue)
    adata_temp = temp_bulk[ind_select,]
    # reformatting
#     adata_temp.X = np.array(adata_temp.X.todense())
    adata_temp.X = np.array(adata_temp.X)
    adata_temp.obs['condition'] = [int(x[:-1]) for x in adata_temp.obs['age']] 
    adata_temp.obs = adata_temp.obs[['condition', 'sex']]
    if len(set(adata_temp.obs['sex'])) <2:
        covariate = ''
    else:
        covariate = '+sex'
#     # toy example
#     covariate = ''
#     np.random.seed(0)
#     ind_select = np.random.permutation(adata_temp.shape[0])[0:100]
#     ind_select = np.sort(ind_select)
#     adata_temp = adata_temp[:, 0:3]
#     adata_temp.X[:,0] = (adata_temp.obs['sex'] == 'male')*3
#     adata_temp.X[:,1] = (adata_temp.obs['condition'])*3
    # DE using MAST 
    R_cmd = util.call_MAST_age()
    get_ipython().run_cell_magic(u'R', u'-i adata_temp -i covariate -o de_res', R_cmd)
    de_res.columns = ['gene', 'raw-p', 'coef', 'bh-p']
    de_res.index = de_res['gene']
    DE_result_MAST[analyte] = pd.DataFrame(index = gene_name_list)
    DE_result_MAST[analyte] = DE_result_MAST[analyte].join(de_res)
    # fc between yound and old
    X = adata_temp.X
    y = (adata_temp.obs['condition']>10)
    DE_result_MAST[analyte]['fc'] = X[y,:].mean(axis=0) - X[~y,:].mean(axis=0)
#     break

Trying to set attribute `.obs` of view, making a copy.


Gonadal_Fat 0/17


Trying to set attribute `.obs` of view, making a copy.


Brown_Fat 1/17


Trying to set attribute `.obs` of view, making a copy.


Kidney 2/17


Trying to set attribute `.obs` of view, making a copy.


Brain 3/17


Trying to set attribute `.obs` of view, making a copy.


Heart 4/17


Trying to set attribute `.obs` of view, making a copy.


Subcutaneous_Fat 5/17


Trying to set attribute `.obs` of view, making a copy.


Marrow 6/17


Trying to set attribute `.obs` of view, making a copy.


Lung 7/17


Trying to set attribute `.obs` of view, making a copy.


White_Blood_Cells 8/17


Trying to set attribute `.obs` of view, making a copy.


Liver 9/17


Trying to set attribute `.obs` of view, making a copy.


Spleen 10/17


Trying to set attribute `.obs` of view, making a copy.


Small_Intestine 11/17


Trying to set attribute `.obs` of view, making a copy.


Pancreas 12/17


Trying to set attribute `.obs` of view, making a copy.


Limb_Muscle 13/17


Trying to set attribute `.obs` of view, making a copy.


Mesenteric_Fat 14/17


Trying to set attribute `.obs` of view, making a copy.


Bone 15/17


Trying to set attribute `.obs` of view, making a copy.


Skin 16/17


### Save DE results

In [10]:
with open(output_folder+'/DE_bulk.pickle', 'wb') as handle:
    pickle.dump(DE_result_MAST, handle)
    pickle.dump(analysis_list, handle)
    pickle.dump(analysis_info, handle)

### Validation

In [12]:
for analyte in DE_result_MAST.keys():
    print(analyte, np.sum(DE_result_MAST[analyte]['bh-p']<0.00001))


Fat 12428
Liver 8147
Limb_Muscle 8066
Trachea 9606
Pancreas 10145
Marrow 10720
Tongue 8795
Lung 5588
Spleen 2500
Heart 8132
Skin 8647
Bladder 11658
Kidney 1001
Thymus 4762
Brain_Myeloid 9601
Mammary_Gland 5029
Brain_Non-Myeloid 12855
Large_Intestine 10159
