## separating Ecto for Andrea's project
Selecting all Epcam clusters per sample and saving it as a combined object.  
ME8:  
![](../../markdown_images/mouse/mm39/markdown_plots/ectoderm_spinoff/ME8_umap.png)   

ME9:  
![](../../markdown_images/mouse/mm39/markdown_plots/ectoderm_spinoff/ME9_umap.png)   

ME10:  
![](../../markdown_images/mouse/mm39/markdown_plots/ectoderm_spinoff/ME10_umap.png)   

ME11:  
![](../../markdown_images/mouse/mm39/markdown_plots/ectoderm_spinoff/ME11_umap.png)   

ME12:  
![](../../markdown_images/mouse/mm39/markdown_plots/ectoderm_spinoff/ME12_umap.png)   

Subset these cells and combined them into a single file: adata_epcam_concat.h5ad  
Will combine this with our epcam+ cells.

In [None]:
import logging
logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)
import scanpy as sc
import anndata as ad
import scvelo as scv
import scvi
import seaborn as sns
import plotly.express as px
import numpy as np
from dash import Dash, dcc, html, Input, Output

import pandas as pd

import os
import sys
import time
import gc
os.environ['R_HOME'] = sys.exec_prefix+"/lib/R/"

# Plotting
import matplotlib
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.colors import LinearSegmentedColormap, ListedColormap
from matplotlib.lines import Line2D 

from copy import copy
reds = copy(mpl.cm.Reds)
reds.set_under("lightgray")

project_directory = '/Cranio_Lab/Louk_Seton/4_species_project'
os.chdir(os.path.expanduser("~")+project_directory)

In [None]:
##mouse mm10
start_time=time.strftime("%Y_%m_%d-%I_%M_%S_%p")
print('start time:',start_time)

sample_names = ['ME8','ME9','ME10','ME11','ME12'] #specify the sample names
species = 'mouse' #specify the species
genome = 'mm39' #specify the genome
output_prefix = 'h5ad_files/' #specify the location of the cellranger output

adata_dict = {}
for sample in sample_names:
    adata_dict[sample] = sc.read(output_prefix+species+'/'+genome+'/'+sample+'_filtered_by_mm10.h5ad')
    #adata_dict[sample].obs['leiden_post_QC'] = adata_dict[sample].obs['leiden_post_QC'].astype(str) + '_'+sample

In [None]:
## code for any markdown figures ##
output_dir = 'markdown_images/'+species+'/'+genome+'/markdown_plots/ectoderm_spinoff/'
!mkdir -p {output_dir}

i='ME8'
plt.rcParams['figure.figsize'] = [4,3]
ax = sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, groups = ['0','9','17','18'],
           cmap = reds, vmin = 0.05,show = False)
plt.savefig(output_dir+i+'_umap.png', dpi = 80,bbox_inches='tight')
plt.close()

i='ME9'
plt.rcParams['figure.figsize'] = [4,3]
ax = sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, groups = ['5','14','21','22'],
           cmap = reds, vmin = 0.05,show = False)
plt.savefig(output_dir+i+'_umap.png', dpi = 80,bbox_inches='tight')
plt.close()

i='ME10'
plt.rcParams['figure.figsize'] = [4,3]
ax = sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, groups = ['12','15','18','21'],
           cmap = reds, vmin = 0.05,show = False)
plt.savefig(output_dir+i+'_umap.png', dpi = 80,bbox_inches='tight')
plt.close()

i='ME11'
plt.rcParams['figure.figsize'] = [4,3]
ax = sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, groups = ['10','14','18','21','22'],
           cmap = reds, vmin = 0.05,show = False)
plt.savefig(output_dir+i+'_umap.png', dpi = 80,bbox_inches='tight')
plt.close()

i='ME12'
plt.rcParams['figure.figsize'] = [4,3]
ax = sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, groups = ['7','15','21',],
           cmap = reds, vmin = 0.05,show = False)
plt.savefig(output_dir+i+'_umap.png', dpi = 80,bbox_inches='tight')
plt.close()

In [None]:
##ME8
i = 'ME8'
adata_dict[i].X = adata_dict[i].layers['original_counts'].copy()
sc.pp.normalize_total(adata_dict[i]) # Normalizing to median total counts
sc.pp.log1p(adata_dict[i]) # Logarithmize the data
adata_dict[i].layers["normalized_counts"] = adata_dict[i].X.copy()

##highly variable genes
sc.pp.highly_variable_genes(adata_dict[i], n_top_genes=1000,)

##dimensionality reduction and clustering
sc.tl.pca(adata_dict[i])
sc.pp.neighbors(adata_dict[i])
sc.tl.umap(adata_dict[i])
sc.tl.leiden(adata_dict[i],resolution = 1, key_added = 'leiden_post_QC')

sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, 
           groups = ['0','9','17','18'],
           cmap = reds, vmin = 0.05)


In [None]:
sc.tl.leiden(adata_dict['ME8'],resolution = 2, key_added = 'leiden_post_QC_high')
sc.pl.umap(adata_dict['ME8'], color = ['leiden_post_QC_high'])

In [None]:
sc.tl.rank_genes_groups(adata_dict['ME8'],groupby='leiden_post_QC_high',method='wilcoxon')
sc.pl.rank_genes_groups_dotplot(adata_dict['ME8'],groups = ['23'],n_genes = 40, values_to_plot = 'logfoldchanges',
                                cmap='bwr',vmin=-4,vmax=4,min_logfoldchange = 3)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_dict['ME8'],groups = ['7'],n_genes = 60, values_to_plot = 'logfoldchanges',
                                cmap='bwr',vmin=-4,vmax=4,min_logfoldchange = 3.5)

In [None]:
sc.pl.umap(adata_dict['ME8'], color = ['Sox10','Dlx2','Gad1','Cnmd','Dlx5','Pax3','Ahrr','Prdm12','Frzb','Prkn','Fgf14','Sox3','Rfx4','Zic5'],
           #['Epcam','Samd3','T','Shh','Krt19','Foxa2','Car3','Pax9','Sox9','Frzb'], 
           cmap = reds, vmin = 0.05,ncols=3)

In [None]:
##ME9
i = 'ME9'
adata_dict[i].X = adata_dict[i].layers['original_counts'].copy()
sc.pp.normalize_total(adata_dict[i]) # Normalizing to median total counts
sc.pp.log1p(adata_dict[i]) # Logarithmize the data
adata_dict[i].layers["normalized_counts"] = adata_dict[i].X.copy()

##highly variable genes
sc.pp.highly_variable_genes(adata_dict[i], n_top_genes=1000,)

##dimensionality reduction and clustering
sc.tl.pca(adata_dict[i])
sc.pp.neighbors(adata_dict[i])
sc.tl.umap(adata_dict[i])
sc.tl.leiden(adata_dict[i],resolution = 1, key_added = 'leiden_post_QC')

sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, 
           groups = ['5','14','21','22'],
           cmap = reds, vmin = 0.05)


In [None]:
sc.tl.leiden(adata_dict['ME9'],resolution = 3, key_added = 'leiden_post_QC_high')
sc.pl.umap(adata_dict['ME9'],color = ['leiden_post_QC_high'])

In [None]:
sc.pl.umap(adata_dict['ME9'], color = ['Epcam','Pax9','Pyy','Trh','Neurod1','Neurog1','Isl1','Hesx1','Pitx2','Wnt6','Sox10','Alx3','Dlx2','Insc','Dlx5','Sox3'], cmap = reds, vmin = 0.05,ncols=3)

In [None]:
sc.tl.rank_genes_groups(adata_dict['ME9'],groupby='leiden_post_QC',method='wilcoxon')
sc.pl.rank_genes_groups_dotplot(adata_dict['ME9'],groups = ['21'],n_genes = 40, values_to_plot = 'logfoldchanges',
                                cmap='bwr',vmin=-4,vmax=4,min_logfoldchange = 3)

In [None]:
sc.pl.umap(adata_dict['ME9'], color = ['Sox10','Gad1','Cnmd','Nherf4','Zic5','Sox3','Dlx5','Pax3','Ahrr','Prdm12','Frzb','Prkn','Fgf14','Rfx4'],
           #['Epcam','Samd3','T','Shh','Krt19','Foxa2','Car3','Pax9','Sox9','Frzb'], 
           cmap = reds, vmin = 0.05,ncols=3)

In [None]:
sc.pl.umap(adata_dict['ME9'], color = ['Tlx2','Lhx2'], cmap = reds, vmin = 0.05,ncols=3)

In [None]:
i = 'ME10'
adata_dict[i].X = adata_dict[i].layers['original_counts'].copy()
sc.pp.normalize_total(adata_dict[i]) # Normalizing to median total counts
sc.pp.log1p(adata_dict[i]) # Logarithmize the data
adata_dict[i].layers["normalized_counts"] = adata_dict[i].X.copy()

##highly variable genes
sc.pp.highly_variable_genes(adata_dict[i], n_top_genes=1000,)

##dimensionality reduction and clustering
sc.tl.pca(adata_dict[i])
sc.pp.neighbors(adata_dict[i])
sc.tl.umap(adata_dict[i])
sc.tl.leiden(adata_dict[i],resolution = 1, key_added = 'leiden_post_QC')

sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, 
           groups = ['12','15','18','21'],
           cmap = reds, vmin = 0.05)


In [None]:
sc.pl.umap(adata_dict['ME10'], color = ['leiden_post_QC','doublet_score','Epcam','Sox10','Tlx2','Neurog1','Neurod1','Lhx2'], cmap = reds, vmin = 0.05,ncols=3)

In [None]:
i = 'ME11'
adata_dict[i].X = adata_dict[i].layers['original_counts'].copy()
sc.pp.normalize_total(adata_dict[i]) # Normalizing to median total counts
sc.pp.log1p(adata_dict[i]) # Logarithmize the data
adata_dict[i].layers["normalized_counts"] = adata_dict[i].X.copy()

##highly variable genes
sc.pp.highly_variable_genes(adata_dict[i], n_top_genes=1000,)

##dimensionality reduction and clustering
sc.tl.pca(adata_dict[i])
sc.pp.neighbors(adata_dict[i])
sc.tl.umap(adata_dict[i])
sc.tl.leiden(adata_dict[i],resolution = 1, key_added = 'leiden_post_QC')

sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, 
           groups = ['10','14','18','21','22'],
           cmap = reds, vmin = 0.05)



In [None]:
i = 'ME12'
adata_dict[i].X = adata_dict[i].layers['original_counts'].copy()
sc.pp.normalize_total(adata_dict[i]) # Normalizing to median total counts
sc.pp.log1p(adata_dict[i]) # Logarithmize the data
adata_dict[i].layers["normalized_counts"] = adata_dict[i].X.copy()

##highly variable genes
sc.pp.highly_variable_genes(adata_dict[i], n_top_genes=1000,)

##dimensionality reduction and clustering
sc.tl.pca(adata_dict[i])
sc.pp.neighbors(adata_dict[i])
sc.tl.umap(adata_dict[i])
sc.tl.leiden(adata_dict[i],resolution = 1, key_added = 'leiden_post_QC')

sc.pl.umap(adata_dict[i],color = ['Epcam','leiden_post_QC',
                         ], ncols = 3, 
           groups = ['7','15','21',],
           cmap = reds, vmin = 0.05)



In [None]:
adata_dict['ME8'] = adata_dict['ME8'][adata_dict['ME8'].obs['leiden_post_QC'].isin(['0','9','17','18'])].copy()
adata_dict['ME9'] = adata_dict['ME9'][adata_dict['ME9'].obs['leiden_post_QC'].isin(['5','14','21','22'])].copy()
adata_dict['ME10'] = adata_dict['ME10'][adata_dict['ME10'].obs['leiden_post_QC'].isin(['12','15','18','21'])].copy()
adata_dict['ME11'] = adata_dict['ME11'][adata_dict['ME11'].obs['leiden_post_QC'].isin(['10','14','18','21','22'])].copy()
adata_dict['ME12'] = adata_dict['ME12'][adata_dict['ME12'].obs['leiden_post_QC'].isin(['7','15','21',])].copy()

In [None]:
adata = ad.concat(adata_dict)
adata.var = adata_dict['ME12'].var.iloc[:,range(0,11)]

adata.X = adata.layers['original_counts'].copy()

adata.write(output_prefix+species+'/'+genome+'/'+'adata_mm39_epcam_concat.h5ad')