In [None]:
import os
import pandas as pd
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.metrics import r2_score
from scipy.stats import pearsonr

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor="white")

In [None]:
ROOT_DIR = '/work/hdd/bbjr/mallina1/data/human_cellariumgpt_v2/assay_conversion'

INP_DIR = max((d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))), key=lambda x: os.path.getctime(os.path.join(ROOT_DIR, x)))
print(INP_DIR)

In [None]:
original_adata = sc.read_h5ad(os.path.join(ROOT_DIR, INP_DIR, 'original.h5ad'))
converted_adata = sc.read_h5ad(os.path.join(ROOT_DIR, INP_DIR, 'converted.h5ad'))
converted_means_adata = sc.read_h5ad(os.path.join(ROOT_DIR, INP_DIR, 'converted_means.h5ad'))

In [None]:
print(original_adata.obs.CellType.value_counts())
print()
print(original_adata.obs.Method.value_counts())
print()
print(original_adata.obs.Experiment.value_counts())

In [None]:
assay1 = '10x Chromium (v3)'
cell_type = 'CD4+ T cell'

ct_original = original_adata[original_adata.obs.CellType == cell_type]
ct_converted = converted_adata[converted_adata.obs.CellType == cell_type]
ct_converted_means = converted_means_adata[converted_means_adata.obs.CellType == cell_type]

In [None]:
a1_X_g = np.array(ct_original[ct_original.obs.Method == assay1].X.sum(0)).squeeze()
a2_X_g = np.array(ct_converted[ct_converted.obs.Method == assay1].X.sum(0)).squeeze()
a3_X_g = np.array(ct_converted_means[ct_converted_means.obs.Method == assay1].X.sum(0)).squeeze()

a1_X_g = a1_X_g / np.sum(a1_X_g) * 1000000
a2_X_g = a2_X_g / np.sum(a2_X_g) * 1000000
a3_X_g = a3_X_g / np.sum(a3_X_g) * 1000000

In [None]:
x_label = f'Original {assay1}'
y_label = 'Converted 10x v3'

df = {
    x_label: np.log1p(a1_X_g),
    y_label: np.log1p(a2_X_g),
    'var_names': ct_original.var_names,
    'gene_symbols': ct_original.var.gene_symbols
}

px.scatter(df, x=x_label, y=y_label, 
           hover_data=['gene_symbols'], 
           width=250, height=250,
           title=f'{cell_type}')

In [None]:
r2 = r2_score(np.log1p(a1_X_g), np.log1p(a2_X_g))
print(f'sklearn.metrics.r2_score in log space:\t{r2}')

r2 = r2_score(a1_X_g, a2_X_g)
print(f'sklearn.metrics.r2_score:\t{r2}')

r2 = pearsonr(np.log1p(a1_X_g), np.log1p(a2_X_g))
print(f'scipy.stats.pearsonr in log space:\t{r2}')

r2 = pearsonr(a1_X_g, a2_X_g)
print(f'scipy.stats.pearsonr:\t{r2}')

In [None]:
x_label = f'Original {assay1}'
y_label = 'Converted 10x v3'

df = {
    x_label: np.log1p(a1_X_g),
    y_label: np.log1p(a3_X_g),
    'var_names': ct_original.var_names,
    'gene_symbols': ct_original.var.gene_symbols
}

px.scatter(df, x=x_label, y=y_label, 
           hover_data=['gene_symbols'], 
           width=250, height=250,
           title=f'{cell_type}')

In [None]:
r2 = r2_score(np.log1p(a1_X_g), np.log1p(a3_X_g))
print(f'sklearn.metrics.r2_score in log space:\t{r2}')

r2 = r2_score(a1_X_g, a3_X_g)
print(f'sklearn.metrics.r2_score:\t{r2}')

r2 = pearsonr(np.log1p(a1_X_g), np.log1p(a3_X_g))
print(f'scipy.stats.pearsonr in log space:\t{r2}')

r2 = pearsonr(a1_X_g, a3_X_g)
print(f'scipy.stats.pearsonr:\t{r2}')