In [1]:
'''
author: Yike Xie
data: 3/10/2023
content: correlate image features and transcriptome
'''

'\nauthor: Yike Xie\ndata: 3/10/2023\ncontent: correlate image features and transcriptome\n'

In [2]:
import os
import sys
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns

import scanpy as sc
import pickle

In [3]:
def sig_dataset(feas_fn, data_fn):
    print('load features')
    with open(feas_fn, 'rb') as f:
        features = pd.read_pickle(f)
    
    print('load dataset')
    adata = sc.read_h5ad(data_fn)
    sc.pp.normalize_total(adata, target_sum=1e6)

    print('filter genes')
    sc.pp.filter_genes(adata, min_cells=10)

    print('Add features to adata')
    for col in features.columns:
        adata.obs[col] = features.loc[adata.obs_names][col]

    return adata

def get_correlation(adata_s, feas):
    #corr = dsim.correlation.correlate_features_phenotypes(feas, fillna=0)##
    exp = adata_s.X.T
    phe = adata_s.obs[feas].fillna(0)

    x = exp #(39466, 10)
    y = phe.values.T #(2, 10)

    from scipy.stats import rankdata

    xw = np.zeros_like(x, float)
    for ii, xi in enumerate(x):
        xw[ii] = rankdata(xi, method='average')
    yw = np.zeros_like(y, float)
    for ii, yi in enumerate(y):
        yw[ii] = rankdata(yi, method='average')

    xw = ((xw.T - xw.mean(axis=1)) / xw.std(axis=1)).T
    yw = ((yw.T - yw.mean(axis=1)) / yw.std(axis=1)).T
    n = xw.shape[1]
    r = np.dot(xw, yw.T) / n

    corr = pd.DataFrame(
                    data=r,
                    index=adata_s.var_names,
                    columns=phe.columns,
                    dtype=float)

    return corr

In [4]:
feas_fn = '/home/yike/phd/cancer_cells_img_seq/figures/combine_features.pkl'
data_fn = '/home/yike/phd/cancer_cells_img_seq/data/combine_gene.h5ad'
adata = sig_dataset(feas_fn, data_fn)

save_figures = '/home/yike/phd/cancer_cells_img_seq/figures/correlate_features/gene/'

load features
load dataset
filter genes
Add features to adata


## focus following analyses on cells with low percentage of mitochondrial reads

In [5]:
## divide live and dead cells according to the fraction o mitochondrial reads
live_cells = adata.obs[adata.obs['pct_counts_mt'] < 25].index
dead_cells = adata.obs[adata.obs['pct_counts_mt'] > 25].index

# create dataset only having cells with low percentage of mitochondrial reads
adata_live = adata[live_cells, :]

In [13]:
# Average spectra
adata_live.obs['spectra_norm'] = np.array(adata_live.obs['spectra'].tolist()).mean(axis=1) / adata_live.obs['area']
print('Correlate with some simple features')
feas = ['area', 'eccentricity', 'spectra_norm']

corr = get_correlation(adata_live, feas)
corr = corr.fillna(0)

corr['Frac'] = 100 * (adata_live[:, corr.index].X > 0).sum(axis=0) / adata_live.obs.shape[0]
corr['Number'] = (adata_live[:, corr.index].X > 0).sum(axis=0)
corr.to_csv(save_figures + 'correlation_live_cells.tsv', sep='\t')

Trying to set attribute `.obs` of view, copying.


Correlate with some simple features


  xw = ((xw.T - xw.mean(axis=1)) / xw.std(axis=1)).T
