In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import anndata as ad
from scipy.stats import pearsonr, spearmanr, ttest_ind
import pickle
import os
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
from matplotlib.collections import PatchCollection
from matplotlib.colors import ListedColormap
import seaborn as sns
sns.set_style("ticks")
from sklearn.neighbors import BallTree

from scipy.stats import mannwhitneyu, ttest_ind
from statsmodels.stats.multitest import multipletests
from decimal import Decimal
import copy

import random

os.chdir("/labs/abrunet1/Eric/MERFISH")

In [2]:
adata = sc.read_h5ad("results/clocks/anndata/lasso_loocv_predicted_age_correlation_n30_spatialsmoothonsmooth_alpha08_nneigh20.h5ad")

In [3]:
# compute mean cell type expression vectors
celltypes = np.unique(adata.obs.celltype)
mean_exps = []

for ct in celltypes:
    ct_mean = adata[adata.obs.celltype==ct].X.mean(axis=0)
    mean_exps.append(ct_mean)

In [15]:
# compute correlations between actual and mean by cell

mean_r_list = []

for i in range(len(celltypes)):
    sub_adata = adata[adata.obs.celltype==celltypes[i]]
    r_list = []
    np.random.seed(444)
    for j in np.random.choice(np.arange(sub_adata.shape[0]),100):
        r,p = pearsonr(sub_adata.X[j,:], mean_exps[i])
        r_list.append(r)
        
    mean_r_list.append(np.mean(r_list))

In [16]:
print(mean_r_list)

[0.8952883320987987, 0.7654279592855375, 0.8411438277708148, 0.8181607147546582, 0.7580462249748732, 0.8059127339633071, 0.7198049273570861, 0.7090879897298276, 0.9014922025636106, 0.8228608539421102, 0.8722745937080181, 0.7247541914578778, 0.7569671842658268, 0.7853247812017093, 0.8355094315543315, 0.6706430159998601, 0.7041235239956879, 0.7153886615947675]


In [17]:
print(np.mean(mean_r_list))

0.7834561750121503


In [8]:
# compute correlations between actual and mean by cell

r_list = []

for i in range(len(celltypes)):
    sub_adata = adata[adata.obs.celltype==celltypes[i]]
    np.random.seed(444)
    for j in np.random.choice(np.arange(sub_adata.shape[0]),20):
        r,p = pearsonr(sub_adata.X[j,:], mean_exps[i])
        r_list.append(r)

In [9]:
np.median(r_list)

0.8356804587204106

In [10]:
np.mean(r_list)

0.7919258765379878

In [11]:
celltypes

array(['Astrocyte', 'B cell', 'Endothelial', 'Ependymal', 'Macrophage',
       'Microglia', 'NSC', 'Neuroblast', 'Neuron-Excitatory',
       'Neuron-Inhibitory', 'Neuron-MSN', 'Neutrophil', 'OPC',
       'Oligodendrocyte', 'Pericyte', 'T cell', 'VLMC', 'VSMC'],
      dtype=object)

In [16]:
r_list = []

for i in [6,15]: # NSC and T cell
    sub_adata = adata[adata.obs.celltype==celltypes[i]]
    np.random.seed(444)
    for j in np.random.choice(np.arange(sub_adata.shape[0]),100):
        r,p = pearsonr(sub_adata.X[j,:], mean_exps[i])
        r_list.append(r)

In [17]:
np.median(r_list)

0.7032760794724351

In [18]:
np.mean(r_list)

0.6952239716784732