In [None]:
# layer: totalRNA, rbRNA, innuclei, TE
import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from anndata import AnnData
from tqdm.notebook import tqdm
import scanpy.external as sce
import harmonypy
import scipy

import warnings 
warnings.filterwarnings('ignore')

`adata_C1` is loaded from a preprocessed `.h5ad` file.

This dataset represents the C1 batch of wild-type mouse brain tissue.
It has been processed upstream and is ready for downstream analysis.

In [None]:
adata_C1 = ad.read('/path/to/C1_WT_mousebrain1_0224v1.h5ad')

In [None]:
#split omic 
# Filter based on the row labels (gene names) in `adata_C1.var`
adata_C1_ntRNA = adata_C1[:, adata_C1.var_names.str.endswith('_ntRNA')]
adata_C1_rbRNA = adata_C1[:, adata_C1.var_names.str.endswith('_rbRNA')]


In [None]:
new_feature_names = adata_C1.var_names.str.replace(r'(_ntRNA|_rbRNA)', '', regex=True).drop_duplicates()

In [None]:
new_X = adata_C1_ntRNA.X + adata_C1_rbRNA.X

In [None]:
new_data = sc.AnnData(X=new_X, var=pd.DataFrame(index=new_feature_names),obs=adata_C1.obs.copy())

new_data.layers['ntRNA'] = adata_C1_ntRNA.X
new_data.layers['rbRNA'] = adata_C1_rbRNA.X

new_data.uns = adata_C1.uns.copy()

print(new_data)

`outer_counts` is loaded from `C1_remain_readsouter_extracted.csv`.

This file contains the extracted counts of outer (non-nuclear) reads from the C1 batch of wild-type mouse brain tissue.

In [None]:
outer_counts = pd.read_csv('/path/to/C1_remain_readsouter_extracted.csv', index_col=0)

In [None]:
existing_df = new_data.uns['remain_reads_info_new'].reset_index(drop=True)
print(existing_df.columns)

In [None]:
outer_counts = outer_counts.reset_index(drop=True)
merged_df = pd.concat([existing_df, outer_counts], ignore_index=True)

In [None]:
# IF raw_cell_barcode=-1, cell_barcode=-1
merged_df.loc[merged_df['raw_cell_barcode'] == -1, ['cell_barcode', 'nuclei']] = -1

merged_df = merged_df.drop(columns=['gene', 'gridc_gridr_tilenum', 'raw_cell_barcode'])
new_data.uns['remain_reads_info_new'] = merged_df

In [None]:
rbRNA = new_data.layers['rbRNA'].toarray()  # Translation
transcription = new_data.X  # Transcription


In [None]:
# Convert sparse matrix to dense numpy array
transcription_dense = transcription.toarray()  # Converts the sparse matrix to a dense array

# Apply np.where to avoid division by zero
transcription_safe = np.where(transcription_dense == 0, np.nan, transcription_dense)

In [None]:
te = rbRNA / transcription_safe
new_data.layers['TE'] = te

In [None]:
for layer_name, layer_data in new_data.layers.items():
    if isinstance(layer_data, scipy.sparse.coo_matrix):
        new_data.layers[layer_name] = layer_data.toarray()

new_data.write('/path/to/C1_WT_mousebrain.h5ad')