## notes  
4/4/2025
Here I will annotate the mouse data per stage. First I will just do a rough annotation to facilitate SAMap.  
Later on I will go into more detail. 

In [None]:
import logging
logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)
import scanpy as sc
import scvelo as scv
import scvi
import seaborn as sns
import plotly.express as px
import numpy as np
from dash import Dash, dcc, html, Input, Output

import pandas as pd

import os
import sys
import time
import gc
os.environ['R_HOME'] = sys.exec_prefix+"/lib/R/"

# Plotting
import matplotlib
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.colors import LinearSegmentedColormap, ListedColormap
from matplotlib.lines import Line2D 

from copy import copy
reds = copy(mpl.cm.Reds)
reds.set_under("lightgray")

project_directory = '/Cranio_Lab/Louk_Seton/4_species_project'
os.chdir(os.path.expanduser("~")+project_directory)

In [None]:
##read in all samples
sample_names = ['ME8','ME9','ME10','ME11','ME12'] #specify the sample names
species = 'mouse' #specify the species
genome = 'mm10' #specify the genome
output_prefix = 'h5ad_files/' #specify the location of the cellranger output

adata_dict = {}
for sample in sample_names:
    adata_dict[sample] = sc.read(output_prefix+species+'/'+genome+'/'+sample+'_after_filtering.h5ad')

In [None]:
## E8
adata = adata_dict['ME8']

In [None]:
adata

In [None]:
sc.tl.leiden(adata, resolution = 4, key_added = 'leiden_high')

In [None]:
sc.pl.umap(adata, color = ['leiden_post_QC',],
           #groups = ['35'],
           legend_loc = 'on data',
           ncols = 1)

In [None]:
sc.pl.umap(adata,color = ['Pou5f1','Wnt8a','Cdx1','Nkx1-2','Hoxb9',
                          'Hoxb8','ccdc198','Hoxa1'],
           ncols = 3,cmap=reds, vmin =0.05)

In [None]:
sc.tl.rank_genes_groups(adata, groupby = 'leiden_post_QC', method = 'wilcoxon')

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=4,
                                values_to_plot="logfoldchanges", cmap='bwr',
                                vmin=-4, vmax=4,
                                min_logfoldchange=3,
                                colorbar_title='log fold change'
                               )

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=25,
                                groups = ['11'],
                                values_to_plot="logfoldchanges", cmap='bwr',
                                vmin=-4, vmax=4,
                                min_logfoldchange=2.5,
                                colorbar_title='log fold change'
                               )

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, 
                                var_names=['Tfap2b','Foxd3','Sox10',
                                           'Adgrg2','Nkx2-9','Phox2b',
                                           'Shh','Foxa2',
                                           'Hesx1','Fezf1',
                                           'Meis2','Cyp26b1','Hoxa2',
                                           'Pax5','En1','En2',
                                           'Wnt8b','Wnt2b','Barhl2',
                                           'Six3','Lhx2','Rax',
                                           'Neurod1','Neurog1',
                                           'Tfap2a','Dlx6','Krt1',
                                           'Pax1','Vgll2','Pax9',
                                           'Podn','T',
                                           'Pitx2','Pitx3','Dmrta2',
                                           'Col1a1','Twist1','Meox2',
                                           'Kdr','Flt1',
                                          ],
                                values_to_plot="logfoldchanges", cmap='bwr',
                                vmin=-4, vmax=4,
                                min_logfoldchange=3,
                                colorbar_title='log fold change'
                               )

In [None]:
cluster_anno = {
    '1':'Forebrain - Six3+',
    '2':'Midbrain - Pax5+',
    '3':'Neural Tube - Ccdc198+', #is 1700011H14Rik, also Cpn1 in same area in in situ https://bmcdevbiol.biomedcentral.com/articles/10.1186/1471-213X-7-92/tables/2
    '5':'Neural Crest - Sox10+',
    '8':'Hindbrain - Hoxa2+',
    '13':'Floor Plate/Ventral Neural Tube - Nkx2-9+' #Nkx2-9, Shh
    '16':'Strange cells - Pou5f1 +', #Pou5f1 high but half of them have Hox8/9 expression. And these two groups stick together regardless of resolution.
    '18':'Notochord - T+',
    
}