# Introduction
This notebook evaluates how the denoising impacts cell annotations. We are using a dataset with w number of immune cell types that has had manual annotation beyond what their pre-trained cell type classification model annotates. This will be our resource for benchmarking the annotations between the noisy and denoised conditions.

In [3]:
import os
import scanpy as sc
import numpy as np
import pandas as pd
# import rpy2.robjects as ro
# from rpy2.robjects import pandas2ri
# from rpy2.robjects import conversion
# from rpy2.robjects import default_converter
# from rpy2.robjects.conversion import localconverter
# from rpy2.robjects.packages import importr
import scipy.sparse as sp
import anndata as ad

import celltypist
from celltypist import models


import pandas as pd
from anndata import read_h5ad

# import dca
# help(dca)

# from dca.api import dca
import tensorflow as tf


# Loading other datasets 

In [4]:
data_file_path = '/Users/josephschirle/Documents/Courses/StatsM231B/Stat231B_Project/data'


# pbmc68k_reduced_adata = sc.datasets.pbmc68k_reduced() # the 10k or 68k data may be preferred, but this is fast and I don't have to save locally
# # Blood cell dataset
# paul15_adata = sc.datasets.paul15()
# Load the h5 file for the purified 10x data
# pure_11_adata = sc.read_h5ad(os.path.join(data_file_path,"pure_11.h5ad")) # These are PBMC

# PBMC_10x = sc.read_10x_h5(os.path.join(data_file_path, "pbmc_10k_v3_filtered_feature_bc_matrix.h5"))
# PBMC_10x = sc.read_10x_h5(os.path.join(data_file_path, "10k_PBMC_3p_nextgem_Chromium_X_raw_feature_bc_matrix.h5"))

# tm_facs_data = read_h5ad(os.path.join(data_file_path,'tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad'))

# lopez_2022 = read_h5ad(os.path.join(data_file_path,'Lopez_2022_sln_208.h5ad'))
# print(lopez_2022)

Gu_2024 = read_h5ad(os.path.join(data_file_path,'Gu_2024.h5ad'))

# Annotating cell type

In [5]:
# Normalize total counts and log-transform
sc.pp.normalize_total(Gu_2024, target_sum=1e4)
sc.pp.log1p(Gu_2024)


In [None]:
model = models.Model.load(os.path.join(data_file_path,'Adult_Mouse_Gut.pkl'))  # Pretrained immune model
predictions = celltypist.annotate(Gu_2024, model=model)
Gu_2024.obs["celltypist_labels"] = predictions.predicted_labels

In [None]:
print(list(Gu_2024.obs['celltypist_labels'].unique()))
print(list(Gu_2024.obs['Cell_Type'].unique()))

In [8]:
# Simplify labeling scheme
label_map = {'EarlyGC_2': 'GC',
    'Plasma cell': 'PC',
    'CD4': 'CD4+ T',
    'Naive B cells': 'Naive B',
    'EarlyGC_1': 'GC',
    'DC': 'DC',
    'DC (CD103+ CD11-b)': 'cDC1',
    'Activated CD4+ T cell' : 'Activated CD4+ T',
    'CD8+ T cell': 'CD8+ T',
    'Resting B cell': 'B Cell',
    'B cell': 'B Cell',
    'Resting CD4+ T cells' : 'CD4+ T',
    'pDC' : 'pDC',
    'eMBC' : 'eMBC',
    'LTi cell' : 'LTi cell',
    'PreGC' : 'pre-GC.BCs', 
    'GC B cell (LZ)': 'GC',
    'Stromal cell (DN)' : 'Stromal cell',
    'Cd11c Mac' : 'Cd11c Mac',
    'Cd206 Mac' : 'Cd206 Mac',
    'DC (CD103+ CD11+b)': 'cDC2',
    'TA.G2' : 'TA.G2',
    'GC B cell (DZ)' : 'GC',
    'epithelial cell of large intestine' : 'epithelial cell of large intestine',
    'Enterocyte.Progenitor.Early': 'Enterocyte.Progenitor.Early',
    'DC (CD103- C2)' : 'DC (CD103- C2)',
    'ILC3' : 'ILC3',
    'lymphatic' : 'lymphatic',
    'Goblet' : 'Goblet',
    'Stem' : 'Stem',
    'intestinal crypt stem cell' : 'intestinal crypt stem cell',
    'TA' : 'TA',
    'Paneth' : 'Paneth',
    'artery' : 'BEC',
    'ILC1' : 'ILC1',
    'ILC2' : 'ILC2',
    'NKT cell' : 'NKT',
    'Enterocyte.Progenitor' : 'Enterocyte.Progenitor'
}



# Apply the mapping
Gu_2024.obs['Noisy_annotation'] = Gu_2024.obs['celltypist_labels'].replace(label_map)


# Denoising the data

In [None]:
data_file_path = '/Users/josephschirle/Documents/Courses/StatsM231B/Stat231B_Project/data' #############

import tensorflow as tf
tf.compat.v1.disable_eager_execution()



# Reloading he data to denoise raw counts
Gu_2024_denoise = read_h5ad(os.path.join(data_file_path,'Gu_2024.h5ad'))
# Remove genes with zero expression across all cells
sc.pp.filter_genes(Gu_2024_denoise, min_counts=1)

# Apply DCA denoising
sc.external.pp.dca(Gu_2024_denoise)


# Normalize total counts and log-transform
sc.pp.normalize_total(Gu_2024_denoise, target_sum=1e4)
sc.pp.log1p(Gu_2024_denoise)

Gu_2024_denoise.write(os.path.join(data_file_path, 'Denoised_normalized_log_Gu_2024.h5ad'))




In [None]:
model = models.Model.load(os.path.join(data_file_path,'Adult_Mouse_Gut.pkl'))  # Pretrained immune model
predictions = celltypist.annotate(Gu_2024_denoise, model=model)
Gu_2024_denoise.obs["celltypist_labels"] = predictions.predicted_labels

In [None]:
import tensorflow as tf

# Ensure eager execution is enabled as the first action
tf.config.run_functions_eagerly(True)

print("Eager execution status:", tf.executing_eagerly())

# A simple tensor operation to test eager execution
x = tf.constant([1, 2, 3])
y = tf.constant([4, 5, 6])
z = x + y
print("Result of x + y:", z)
