In [2]:
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import numpy as np
import scanpy as sc
import pandas as pd
import anndata as ad
import seaborn as sns

In [4]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [5]:
mouse_colors = plt.cm.colors.ListedColormap(['red', 'darkred', 'blue','darkblue', 'orange', 'darkorange', 'violet', 'darkviolet',])

samples = [
    "Hyp_4wk_1_matrix.h5",
    "Hyp_4wk_2_matrix.h5",
    "Hyp_4wk_3_matrix.h5",
    "Hyp_4wk_4_matrix.h5",
    "Hyp_90wk_1_matrix.h5",
    "Hyp_90wk_2_matrix.h5",
    "Hyp_90wk_3_matrix.h5",
    "Hyp_90wk_4_matrix.h5",
    "PFC_4wk_1_matrix.h5",
    "PFC_4wk_2_matrix.h5",
    "PFC_4wk_3_matrix.h5",
    "PFC_4wk_4_matrix.h5",
    "PFC_90wk_1_matrix.h5",
    "PFC_90wk_2_matrix.h5",
    "PFC_90wk_3_matrix.h5",
    "PFC_90wk_4_matrix.h5"
]

mouse_id = {
    0 : 1,
    1 : 1,
    2 : 2,
    3 : 2,
    4 : 3,
    5 : 3,
    6 : 4,
    7 : 4,
    8 : 5,
    9 : 5,
    10 : 6,
    11 : 6,
    12 : 7,
    13 : 7,
    14 : 8,
    15 : 8
}

In [17]:
adata= sc.read_h5ad("/Users/cmdb/qb25project/mouse-brain-RNAseq/RNAseqdata.h5ad")
print(adata)

AnnData object with n_obs × n_vars = 79667 × 20984
    obs: 'age', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'n_genes', 'n_counts', 'clust_annot', 'sex_ontology_term_id', 'suspension_type', 'cell_type_ontology_term_id', 'assay_ontology_term_id', 'tissue_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'development_stage_ontology_term_id', 'donor_id', 'is_primary_data', 'cell_type_annot', 'tissue_type', 'cell_type', 'assay', 'disease', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid'
    var: 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'feature_is_filtered', 'mean', 'std', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length', 'feature_type'
    uns: 'age_colors', 'batch_condition', 'citation', 'leiden', 'neighbors', 'organism', 'organism_ontology_term_id', 'pca', 'schema_reference', 'schema_version', 'title', 'umap'
    obsm: 'X_pca', 'X_pca_harm

In [18]:
adata.write("RNAseq-converted-10x.h5")

In [None]:
all_adata = []
i = 0
for s in samples:
    area, age, idx, _ = s.split("_")
    print(area, age, idx)
    curr_adata = sc.read_10x_h5(f"/Users/cmdb/qb25project/mouse-brain-RNAseq/")
    curr_adata.var_names_make_unique()
    curr_adata.obs['area'] = area
    curr_adata.obs['age'] = age
    curr_adata.obs['idx'] = i
    i += 1
    curr_adata.var['mt'] = curr_adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
    sc.pp.calculate_qc_metrics(curr_adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

    all_adata.append(curr_adata)

Hyp 4wk 1
reading /Users/cmdb/qb25project/mouse-brain-RNAseq


IsADirectoryError: [Errno 21] Unable to synchronously open file (file read failed: time = Fri Oct  3 12:20:42 2025
, filename = '/Users/cmdb/qb25project/mouse-brain-RNAseq', file descriptor = 74, errno = 21, error message = 'Is a directory', buf = 0x16d0b86f0, total read size = 8, bytes this sub-read = 8, offset = 0)

In [None]:
total_cells = np.sum([a.n_obs for a in all_adata])
print('total cells:', total_cells)

In [None]:
adata = ad.concat(all_adata)

In [None]:
adata[adata.obs.area=='PFC']

In [None]:
adata.obs_names_make_unique()

In [None]:
sc.pp.filter_cells(adata, min_genes=1000)
sc.pp.filter_cells(adata, max_counts=100000)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.filter_cells(adata, min_counts=2500)

In [None]:
adata

In [None]:
adata.obs['mouse_id'] = [mouse_id[i] for i in adata.obs.idx]

In [None]:
# run scrublet on adata to identify doublets
import scrublet as scr
scrub = scr.Scrublet(adata.X, expected_doublet_rate=0.09)
doublet_scores, predicted_doublets = scrub.scrub_doublets(min_gene_variability_pctl=85, 
                                                          n_prin_comps=30)