# since a good portion of the genes in the panel can help us annotate the cerebellum we need to go
# through and clean up the annotation

In [None]:
ad_viz= sc.read_h5ad('baysor_regions_labeled.h5ad')
ad_genes = sc.read_h5ad('baysor_segmented_6-5_micron.h5ad')
ad_genes.obs_names_make_unique()

In [None]:
regions = ['Cortex', 'Hippocampus', 'Thalamus', 'Hypothalamus', 'Basal_Ganglia', 'MPM', 'Cerebellum']

# Create the 'Brain_Region' column with default value as 'Unlabeled'
new_adata.obs['Brain_Region'] = 'Unlabeled'

# Assign the brain region based on the True values in the respective columns
for region in regions:
    new_adata.obs.loc[new_adata.obs[region] == True, 'Brain_Region'] = region

# Verify the new column
print(new_adata.obs[['Brain_Region']].head())

new_adata.obsm['spatial'] = new_adata.obs[["aligned_x", "aligned_y"]].values

new_adata.write_h5ad('COMPLETE_Baysor.h5ad')

In [None]:
# Now we want to clean up the CB annotation
cerebellum_cells = new_adata[new_adata.obs['Brain_Region'] == 'Cerebellum']

cerebellum_cells = new_adata[new_adata.obs['Brain_Region'] == 'Cerebellum']

sc.pp.neighbors(cerebellum_cells)
sc.tl.leiden(cerebellum_cells, resolution=1.5)
sc.tl.umap(cerebellum_cells)

sc.pl.umap(cerebellum_cells,color=['leiden'])

In [None]:
# Use the marker genes from: https://www.nature.com/articles/s41586-021-03220-z
genes = ['Ppp1r17','Rora', 'Gabra6','Cbln1', 'Eomes','Lypd6','Prkcd','Klhl1', 'Lgi2','Gdf10', 'Aqp4','Mobp', 'Ppfibp1','Dcn','Kcnj8', 'Mrc1', 'Flt1', 'Foxj1', 'Tmem119','P2ry12','Pvalb','Sst'] 
sc.pl.dotplot(cerebellum_cells, var_names=genes, groupby='leiden')

In [None]:
# map the leiden clusters to the CB annotations based on marker gene expression
cluster_annotations = {
    '0': 'Molecular layer interneurons',
    '1': 'Granule cells',
    '2': 'Granule cells',
    '3': 'Granule cells',
    '4': 'Granule cells',
    '5': 'Granule cells',
    '6': 'Granule cells',
    '7': 'Granule cells',
    '8': 'Granule cells',
    '9': 'Astrocytes',
    '10': 'Purkinje cells',
    '11': 'Oligodendrocytes',
    '12': 'Endothelial cells',
    '13': 'Pvalb+ interneurons',
    '14': 'Granule cells',
    '15': 'Granule cells',
    '16': 'Golgi cells',
    '17': 'Granule cells',
    '18': 'Granule cells',
    '19': 'Pvalb+ interneurons',
    '20': 'Granule cells',
    '21': 'Granule cells',
    '22': 'Pvalb+ interneurons',
    '23': 'Pvalb+ interneurons',
    '24': 'Pvalb+ interneurons',
    '25': 'Pvalb+ interneurons',
    '26': 'Granule cells',
    '27': 'Microglia',
    '28': 'Pvalb+ interneurons',
    '29': 'Oligodendrocyte precursor cells',
    '30': 'Microglia',
    '31': 'Fibroblasts',
    '32': 'Ependymal cells',
    '33': 'Pvalb+ interneurons',
    '34': 'Granule cells',
    '35': 'Unknown',
    '36': 'Unknown',
    '37': 'Unknown',
}

cerebellum_cells.obs['updated_celltype'] = cerebellum_cells.obs['leiden'].map(cluster_annotations)

# Reapply this clean up to the main data object

# Ensure indices are of the same type
new_adata.obs.index = new_adata.obs.index.astype(str)
cerebellum_cells.obs.index = cerebellum_cells.obs.index.astype(str)

# Step 4: Update the main AnnData object
# Convert the existing cell type column to strings
new_adata.obs['updated_celltype'] = new_adata.obs['celltype'].astype(str)

# Update the cell types for cerebellum cells based on the new annotations
new_adata.obs.loc[cerebellum_cells.obs.index, 'updated_celltype'] = cerebellum_cells.obs['updated_celltype'].astype(str)

# Since I annotated the cerebellum fully I want to remove any Cerebellum neurons that escaped my annotation

sensical_adata = new_adata[~(new_adata.obs.updated_celltype == 'Cerebellum neurons')]

# Final Cleaned up Annotation from scVI and scANVI
sensical_adata.write_h5ad('FINAL_ANNOTATION.h5ad')