In [None]:
from signals_in_the_noise.utilities.tenx_genomics import TenX, DirectoryType
t = TenX("../data/GSE161529_RAW", DirectoryType.MULTIPLE, features_filename="../../data/GSE161529_features.tsv.gz")
t.load_data()

In [None]:
e = f"{t.study_directory}/GSM4909253_N-PM0092-Total"

import scanpy as sc
adata = sc.read_10x_mtx(e)

In [None]:
adata.shape

In [None]:
# Sharing same features means there will be 0 count genes
import numpy as np
nonzero_gene_mask = (adata.X > 0).sum(axis=0).A1 > 0
zero_gene_mask = (adata.X > 0).sum(axis=0).A1 == 0
# Slice AnnData to keep only non-zero genes
adata_nz = adata[:, nonzero_gene_mask]
adata_z = adata[:, zero_gene_mask]

In [None]:
# adata = adata_nz
adata.var['mt'] = adata.var_names.str.upper().str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, inplace=True, qc_vars=['mt'], log1p=False)

In [None]:
adata

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.histplot(adata.obs['total_counts'], bins=100, kde=False)
plt.axvline(500, color='red', linestyle='--', label='500 UMI threshold')
plt.xlabel('Total counts per cell')
plt.ylabel('Cell count')
plt.title('Total RNA counts per cell')
plt.legend()
plt.show()

In [None]:
sns.histplot(adata.obs['n_genes_by_counts'], bins=100, kde=False)
plt.axvline(500, color='red', linestyle='--', label='500 gene threshold')
plt.xlabel('Genes detected per cell')
plt.ylabel('Cell count')
plt.title('Gene count per cell')
plt.legend()
plt.show()

In [None]:
sns.scatterplot(
    x='total_counts',
    y='n_genes_by_counts',
    data=adata.obs,
    s=5, alpha=0.3
)
plt.xlabel('Total counts')
plt.ylabel('Number of genes')
plt.title('Total counts vs. number of genes per cell')
plt.axvline(500, color='red', linestyle='--')
plt.axhline(500, color='blue', linestyle='--')
plt.show()

In [None]:
sns.scatterplot(
    x='total_counts',
    y='pct_counts_mt',
    data=adata.obs,
    s=5, alpha=0.3
)
plt.xlabel('Total counts')
plt.ylabel('% mitochondrial counts')
plt.title('Total counts vs. mitochondrial content')
plt.axhline(20, color='red', linestyle='--')
plt.show()

In [None]:
# from signals_in_the_noise.utilities.storage import DATA_DIRECTORY, get_data_path
# # import signals_in_the_noise.utilities.storage as storage
#
# # print(dir(storage))

import sys
from pathlib import Path

# Add src/ to the Python path if not already present
src_path = Path().resolve() / "src"
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

In [None]:
from signals_in_the_noise.utilities.storage import get_data_path
print(get_data_path("GSE161529_RAW"))