# Load packages

In [None]:
import sys
import os
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context

# Set up the Synapse client
from synapseclient import Synapse
syn = Synapse()
syn.login()  # Assuming you're already logged in or have set up your credentials

# Set up work path
data_dir='Pseudotime_trajectories'
os.chdir(data_dir)

# Inital setting for plot size
from matplotlib import rcParams
FIGSIZE=(6, 6)
rcParams['figure.figsize']=FIGSIZE

sc.settings.verbosity = 1
sc.logging.print_header()
# set number of cores to use
sc.settings.n_jobs = 25
sc.settings.set_figure_params( dpi=300, fontsize=6)

# Load functions

## Initial settings: color, order, traits

In [None]:
from initial_settings import *

## Similarity of cell type

In [None]:
import similarity_cell_type

# Whole data analysis

In [None]:
!wget -q "https://storage.googleapis.com/neuro-dev/Processed_data/RNA-all_full-counts-and-downsampled-CPM.h5ad" -O lister_processed.h5ad

In [None]:
ad_lister = sc.read('lister_processed.h5ad')
ad_lister.obs['SubID'] = ad_lister.obs['batch']
ad_lister.obs['Age'] = ad_lister.obs['age']
ad_lister.write('lister_processed.h5ad')

In [None]:
%%sh

python data_integration_and_embeddings_calculation.py \
-i inhouse_data.h5ad@lister_processed.h5ad \
-r 123456 \
-b Aging@Lister \
-o integrated \
-d 1000 \
-v scanpy@6000 \
-p half \
-n 100 \
-k no

# Data visualization

In [None]:
adata = sc.read('integrated_adata.h5ad')

# Alternative download from synapse
syn62289304 = syn.get(entity="syn62289304", downloadLocation=data_dir)
ad = sc.read(syn62289304)

## Nuclei counts along age groups

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

data = ad.obs[['Batch', 'numerical_age', 'stage_id']]

# Set up the plot
g = sns.FacetGrid(data, row="Batch", col="stage_id", hue="stage_id", aspect=1, height=2, palette=colors_stage_id, col_order=stage_id_order, sharex=False, sharey=False)

# Define a function to handle histplot
def safe_histplot(data, **kwargs):
    try:
        if len(data) > 0:
            sns.histplot(data, **kwargs)
    except ValueError as e:
        print(f"ValueError: {e} for batch: {data['Batch'].iloc[0]} and stage_id: {data['stage_id'].iloc[0]}")
    
g.map_dataframe(safe_histplot, x='numerical_age', multiple="stack")
g.set_axis_labels("Numerical Age", "Nuclei Count Density")
g.set_titles(col_template='{col_name}', row_template='{row_name}')
g.add_legend(title="Stage ID")

# Customize y-axis and x-axis limits for each row and column
xlims = {'Fetal': (-0.5, 0), 
         'Neonatal': (0, 0.164), 
         'Infancy': (0.164, 1), 
         'Childhood': (1, 12), 
         'Adolescence': (12, 20), 
         'Young_Adulthood': (20, 40), 
         'Middle_Adulthood': (40, 60), 
         'Late_Adulthood': (60, 100)}

for (batch, stage_id), ax in g.axes_dict.items():
    if batch == 'Aging':
        ax.set_ylim(0, 50000)
    elif batch == 'Lister':
        ax.set_ylim(0, 20000)

    # Set specific x-limits based on stage_id
    if stage_id in xlims:
        ax.set_xlim(xlims[stage_id])

    # Remove grid lines
    ax.grid(False)

    # Add padding (white space) by setting the axis limits slightly beyond the data range
    x_min, x_max = ax.get_xlim()
    y_min, y_max = ax.get_ylim()
    ax.set_xlim(x_min - 0.05 * (x_max - x_min), x_max + 0.05 * (x_max - x_min))
    ax.set_ylim(y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min))

# Save the figure
plt.savefig("files/figures/nuclei_counts_distribution.pdf", bbox_inches='tight')


## UMAP

### Batch

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(ad, basis = 'umap', color='Batch', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_batch,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)

plt.savefig('files/figures/integrated_data_umap_batch.pdf', transparent=True, format='pdf')

### cell_type_uni

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(ad, basis = 'umap', color='cell_type_uni', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_class,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)
plt.savefig('files/figures/integrated_data_umap_cell_type_uni.pdf', transparent=True, format='pdf')

### stage_id

In [None]:
from pandas.api.types import CategoricalDtype
ad.obs['stage_id_ord'] = ad.obs['stage_id']
cat_dtype = CategoricalDtype(categories=ad.uns['stage_order'], ordered=True)
ad.obs['stage_id_ord'] = ad.obs['stage_id_ord'].astype(cat_dtype)

with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(ad, basis = 'umap', color='stage_id_ord', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)
plt.savefig('files/figures/integrated_data_umap_stage_id_ord.pdf', transparent=True, format='pdf')

## UMAT

### Batch

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(ad, basis = 'umat', color='Batch', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_batch,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)

plt.savefig('files/figures/integrated_data_umat_batch.pdf', transparent=True, format='pdf')

### cell_type_uni

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(ad, basis = 'umat', color='cell_type_uni', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_class,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)
plt.savefig('files/figures/integrated_data_umat_cell_type_uni.pdf', transparent=True, format='pdf')

### stage_id

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.embedding(ad, basis = 'umat', color='stage_id_ord', add_outline=True,cmap='vlag',#legend_loc='on data',
               legend_fontsize=10, legend_fontoutline=2,frameon=False,palette=colors_stage_id,
               outline_width=(0.5, 0.3), vmin=0, vmax=1,size=1,
               return_fig=True)
plt.savefig('files/figures/integrated_data_umat_stage_id_ord.pdf', transparent=True, format='pdf')

## Similarity of cell type

In [None]:
similarity_cell_type.similarity_cell_type_all(ad, 'cell_type_uni', 'Integrated_cell_type_uni', 10, 'files/figures/integrated_data_similarity_two_cohorts_cell_type_uni_all.pdf')
