In [None]:

from pathlib import Path

import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc

import anndata as ad

import pandas as pd

import scvi

sc.logging.print_header()

In [None]:
adata = sc.read_h5ad('./adata_scvimodel.h5ad')

In [None]:
adata

In [None]:
model = scvi.model.SCVI.load('./models/scVI_model', adata=adata)

In [None]:
adata.obsm['X_scVI'] = model.get_latent_representation()

In [None]:
adata.layers['scvi_normalized'] = model.get_normalized_expression(library_size=1e4)

In [None]:
adata

In [None]:
sc.pp.neighbors(adata,
                n_neighbors=30,
                use_rep="X_scVI")
sc.tl.umap(adata, min_dist=0.2)
sc.pl.umap(adata, color='genotype')

In [None]:
sc.tl.leiden(adata, key_added="leiden_scVI", resolution=0.5)
sc.pl.umap(adata,
           color=['genotype', 'leiden_scVI'],
           ncols=2,
           frameon=False)

In [None]:
genotype_colors = {'WT': '#808080', 'MT': '#8B0000'}


sc.pl.umap(adata, color='genotype', palette=genotype_colors, size=3, frameon=False, title='genotype', save='genotype.pdf')

In [None]:
cluster_to_color = {
    '0': '#f3fbd4',
    '1': '#2ca02c',
    '2': '#1f77b4',
    '3': '#8c564b',
    '4': '#ff9896',
    '5': '#d62728',
    '6': '#ff7f0e',
    '7': '#d8eff6',
    '8': '#9467bd',
    '9': '#ffbb78',
    '10': '#17becf',
    '11': '#aec7e8',
    '12': '#313695',
    '13': '#7f7f7f',
}

adata.uns['leiden_scVI_colors'] = [cluster_to_color[str(cluster)] for cluster in adata.obs['leiden_scVI'].cat.categories]

sc.pl.umap(
    adata,
    color='leiden_scVI',
    add_outline=True,
    legend_loc='on data',
    legend_fontsize=12,
    legend_fontoutline=2,
    frameon=False,
    title='UMAP colored by Leiden Clustering',
    palette=adata.uns['leiden_scVI_colors'])

In [None]:
sc.pl.umap(adata,
           color=['genotype', 'leiden_scVI'],
           ncols=2,
           frameon=False)

In [None]:
adata_temp = adata.obs[['genotype', 'leiden_scVI']]

In [None]:
adata_temp

In [None]:
adata_temp.groupby('leiden_scVI')['genotype'].value_counts(normalize=False)

In [None]:
data = {
    'leiden_scVI': [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13],
    'genotype': ['WT', 'MT', 'MT', 'WT', 'MT', 'WT', 'WT', 'MT', 'WT', 'MT', 'MT', 'WT', 'WT', 'MT', 
                 'MT', 'WT', 'WT', 'MT', 'WT', 'MT', 'WT', 'MT', 'WT', 'MT', 'WT', 'MT', 'WT', 'MT'],
    'Counts': [4354, 4048, 3426, 3243, 3265, 3146, 3164, 3146, 2165, 2047, 1991, 1758, 1750, 1660, 
               1644, 1638, 1454, 1384, 975, 842, 735, 673, 550, 452, 512, 268, 114, 101]
}

df = pd.DataFrame(data)

total_wt = df[df['genotype'] == 'WT']['Counts'].sum()
total_mt = df[df['genotype'] == 'MT']['Counts'].sum()

df['Percentage'] = df.apply(lambda x: x['Counts'] / total_wt * 100 if x['genotype'] == 'WT' else x['Counts'] / total_mt * 100, axis=1)

wt_data = df[df['genotype'] == 'WT'][['leiden_scVI', 'Percentage']].rename(columns={'Percentage': 'WT_Percentage'})
mt_data = df[df['genotype'] == 'MT'][['leiden_scVI', 'Percentage']].rename(columns={'Percentage': 'MT_Percentage'})
combined_data = pd.merge(wt_data, mt_data, on='leiden_scVI', how='outer').fillna(0)

custom_order = [5, 4, 7, 8, 0, 1, 13, 10, 12, 6, 11, 2, 9, 3]
combined_data['order'] = combined_data['leiden_scVI'].apply(lambda x: custom_order.index(x))
combined_data = combined_data.sort_values('order').drop('order', axis=1)

fig, ax = plt.subplots(figsize=(10, 8))
indices = range(len(combined_data))
bar_width = 0.35

ax.bar(indices, combined_data['WT_Percentage'], width=bar_width, label='WT', color='#1f77b4')
ax.bar([i + bar_width for i in indices], combined_data['MT_Percentage'], width=bar_width, label='MT', color='#ff7f0e')

ax.set_ylim(0, 20)

ax.set_xlabel('Cell Type')
ax.set_ylabel('Percentage')
ax.set_title('Percentage of Each Cell Type by Genotype')
ax.set_xticks([i + bar_width / 2 for i in indices])
ax.set_xticklabels(combined_data['leiden_scVI'], rotation=90)
ax.legend()

plt.tight_layout()
plt.savefig('./figures/combined_cell_type_percentage.pdf')
plt.show()

In [None]:
adata.uns['log1p']["base"] = None
sc.tl.rank_genes_groups(adata, 'leiden_scVI', use_raw=False, layer='counts', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
wilcoxon_markers = {}
for cluster_id in adata.obs['leiden_scVI'].values.unique():
    wilcoxon_markers[cluster_id] = list(sc.get.rank_genes_groups_df(adata, group=cluster_id)['names'].values[:3])
wilcoxon_markers

In [None]:
for cluster_id in adata.obs['leiden_scVI'].values.unique():
    df = sc.get.rank_genes_groups_df(adata, group=cluster_id)
    df.to_csv(f'./rank_genes/cluster{cluster_id}_rank_genes.csv')

In [None]:
genes_of_interest = ['Plp1', 'Pdgfra', 'Cx3cr1', 'Rgs5', 'Aldh1l1', 'Nnat', 'Drd2', 'Drd1', 'Gad2', 'Gad1', 'Mef2c', 'Slc17a7', 'Chat']

sc.pl.dotplot(
    adata,
    genes_of_interest,
    groupby='leiden_scVI',
    dendrogram=True,
    color_map="Blues",
    swap_axes=True,
    use_raw=False,
    standard_scale='var')

In [None]:
sc.pl.umap(adata, color=['Slc17a7', 'Gad1', 'Drd1', 'Drd2', 'Chat', 'Aldh1l1', 'Pdgfra', 'Plp1', 'Cx3cr1', 'Vim'])

In [None]:
#WT cortex

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

plt.figure(figsize=(10, 10))
for cluster in unique_clusters:
    cluster_subset = subset[subset['leiden_scVI'] == cluster]
    plt.scatter(cluster_subset['CenterX_global_px'], cluster_subset['CenterY_global_px'], 
                color=cluster_to_color[cluster], label=f'Cluster {cluster}', s=5)

plt.title('Subset of Spatial Distribution Colored by Leiden Clusters')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.legend(markerscale=6)
plt.axis('equal')
plt.show()

In [None]:
#WT striatum

x1, y1 = 12000, 9000
x2, y2 = 22000, 26000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

plt.figure(figsize=(10, 10))
for cluster in unique_clusters:
    cluster_subset = subset[subset['leiden_scVI'] == cluster]
    plt.scatter(cluster_subset['CenterX_global_px'], cluster_subset['CenterY_global_px'], 
                color=cluster_to_color[cluster], label=f'Cluster {cluster}', s=5)

plt.title('Subset of Spatial Distribution Colored by Leiden Clusters')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.legend(markerscale=6)
plt.axis('equal')
plt.show()

In [None]:
#MT cortex

x1, y1 = -500, 63500
x2, y2 = 9000, 80500


subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

plt.figure(figsize=(10, 10))
for cluster in unique_clusters:
    cluster_subset = subset[subset['leiden_scVI'] == cluster]
    plt.scatter(cluster_subset['CenterX_global_px'], cluster_subset['CenterY_global_px'], 
                color=cluster_to_color[cluster], label=f'Cluster {cluster}', s=5)

plt.title('Subset of Spatial Distribution Colored by Leiden Clusters')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.legend(markerscale=6)
plt.axis('equal')
plt.show()

In [None]:
#MT striatum

x1, y1 = 29000, 75000
x2, y2 = 39000, 92000


subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

plt.figure(figsize=(10, 10))
for cluster in unique_clusters:
    cluster_subset = subset[subset['leiden_scVI'] == cluster]
    plt.scatter(cluster_subset['CenterX_global_px'], cluster_subset['CenterY_global_px'], 
                color=cluster_to_color[cluster], label=f'Cluster {cluster}', s=5)

plt.title('Subset of Spatial Distribution Colored by Leiden Clusters')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.legend(markerscale=6)
plt.axis('equal')
plt.show()

In [None]:
from scipy.sparse import issparse

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Slc17a7'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='viridis', s=20)
plt.colorbar(scatter)
plt.title('Spatial Distribution of Cells by Slc17a7 Expression in Selected Rectangle')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.legend(markerscale=6)
plt.axis('equal')
plt.show()

In [None]:
#WT cortex Slc17a7

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Slc17a7'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.title('Spatial Distribution of Cells by Slc17a7 Expression in Selected Rectangle')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT cortex Gad1

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Gad1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT cortex Pvalb

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Pvalb'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT cortex Aldh1l1

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Aldh1l1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.title('Spatial Distribution of Cells by Slc17a7 Expression in Selected Rectangle')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT cortex Plp1

x1, y1 = 2500, -5000
x2, y2 = 12000, 12000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Plp1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT cortex Slc17a7

x1, y1 = -500, 63500
x2, y2 = 9000, 80500

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Slc17a7'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT cortex Gad1

x1, y1 = -500, 63500
x2, y2 = 9000, 80500

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Gad1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT cortex Pvalb

x1, y1 = -500, 63500
x2, y2 = 9000, 80500

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Pvalb'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT cortex Aldh1l1

x1, y1 = -500, 63500
x2, y2 = 9000, 80500

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Aldh1l1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT cortex Plp1

x1, y1 = -500, 63500
x2, y2 = 9000, 80500

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Plp1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT striatum  Drd1

x1, y1 = 12000, 9000
x2, y2 = 22000, 26000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Drd1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5) 
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT striatum Drd2

x1, y1 = 12000, 9000
x2, y2 = 22000, 26000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Drd2'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT striatum Plp1

x1, y1 = 12000, 9000
x2, y2 = 22000, 26000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Plp1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.title('Spatial Distribution of Cells by Plp1 Expression in Selected Rectangle')
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT striatum Chat

x1, y1 = 12000, 9000
x2, y2 = 22000, 26000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Chat'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#WT striatum Vim

x1, y1 = 12000, 9000
x2, y2 = 22000, 26000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Vim'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT striatum Drd1

x1, y1 = 12000, 75000
x2, y2 = 22000, 92000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Drd1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT striatum Drd2

x1, y1 = 12000, 75000
x2, y2 = 22000, 92000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Drd2'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT striatum Plp1

x1, y1 = 12000, 75000
x2, y2 = 22000, 92000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Plp1'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT striatum Vim

x1, y1 = 12000, 75000
x2, y2 = 22000, 92000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Vim'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
#MT striatum Chat

x1, y1 = 12000, 75000
x2, y2 = 22000, 92000

subset = adata.obs[(adata.obs['CenterX_global_px'] >= x1) & 
                   (adata.obs['CenterX_global_px'] <= x2) &
                   (adata.obs['CenterY_global_px'] >= y1) & 
                   (adata.obs['CenterY_global_px'] <= y2)]

unique_clusters = subset['leiden_scVI'].unique()

subset_indices = np.where(np.isin(adata.obs_names, subset.index))[0]

gene_expression = adata[:, 'Chat'].X[subset_indices, :]
if issparse(gene_expression):
    gene_expression = gene_expression.toarray().flatten()

gene_expression = np.log1p(gene_expression)

plt.figure(figsize=(10, 10))
scatter = plt.scatter(subset['CenterX_global_px'], 
                      subset['CenterY_global_px'], 
                      c=gene_expression, cmap='Reds', s=20)
plt.colorbar(scatter)
plt.clim(0, 1.5)
plt.xlabel('X Coordinate (px)')
plt.ylabel('Y Coordinate (px)')
plt.axis('equal')
plt.show()

In [None]:
for cluster_id in adata.obs['leiden_scVI'].values.unique():
    temp = adata[adata.obs['leiden_scVI'] == cluster_id,:]
    sc.tl.rank_genes_groups(temp, groupby="genotype", use_raw=False, method='wilcoxon')
    df = sc.get.rank_genes_groups_df(temp, group="MT")
    df.to_csv(f'./rank_genes2/cluster{cluster_id}_rank_genes2.csv') 

In [None]:
combined_df = pd.DataFrame()

unique_clusters = range(14)

for cluster_id in unique_clusters:
    temp_df = pd.read_csv(f'./rank_genes2/cluster{cluster_id}_rank_genes2.csv')
    temp_df = temp_df[temp_df['pvals_adj'] < 0.05]
    temp_df['cluster'] = str(cluster_id)
    combined_df = pd.concat([combined_df, temp_df])

combined_df['cluster'] = pd.Categorical(combined_df['cluster'], categories=[str(i) for i in unique_clusters], ordered=True)
combined_df.sort_values('cluster', inplace=True)

cluster_to_color = {
    '0': '#f3fbd4',
    '1': '#2ca02c',
    '2': '#1f77b4',
    '3': '#8c564b',
    '4': '#ff9896',
    '5': '#d62728',
    '6': '#ff7f0e',
    '7': '#d8eff6',
    '8': '#9467bd',
    '9': '#ffbb78',
    '10': '#17becf',
    '11': '#aec7e8',
    '12': '#313695',
    '13': '#7f7f7f',
}

color_dict = {str(cluster): color for cluster, color in cluster_to_color.items()}

plt.figure(figsize=(12, 8))
strip_plot = sns.stripplot(x="cluster", y="logfoldchanges", data=combined_df, jitter=0.35, 
                           palette=color_dict)

for cluster_id in unique_clusters:
    cluster_str = str(cluster_id)
    num_pos_genes = combined_df[(combined_df['cluster'] == cluster_str) & (combined_df['logfoldchanges'] > 0)].shape[0]
    num_neg_genes = combined_df[(combined_df['cluster'] == cluster_str) & (combined_df['logfoldchanges'] < 0)].shape[0]
    plt.text(cluster_id, 1.5, f'P={num_pos_genes}', ha='center', va='bottom', fontsize=9)
    plt.text(cluster_id, -1.5, f'N={num_neg_genes}', ha='center', va='top', fontsize=9)

plt.title('Log2 fold changes for each cluster with significance')
plt.xlabel('Cluster')
plt.ylabel('Average log2 fold change')

plt.legend([], [], frameon=False)
plt.show()

In [None]:
brain_region = {
    '1': 'cortex', '2': 'cortex', '3': 'cortex', '4': 'cortex', '5': 'cortex',
    '6': 'cortex', '7': 'cortex', '8': 'cortex', '9': 'cortex', '10': 'cortex',
    '11': 'cortex', '12': 'cortex', '13': 'cortex', '14': 'cortex', '15': 'cortex',
    '16': 'cortex', '17': 'cortex', '18': 'cortex', '19': 'CC', '20': 'striatum',
    '21': 'striatum', '22': 'striatum', '23': 'striatum', '24': 'striatum',
    '25': 'striatum', '26': 'striatum', '27': 'striatum', '28': 'striatum',
    '29': 'striatum', '30': 'striatum', '31': 'striatum', '32': 'striatum',
    '33': 'striatum', '34': 'striatum', '35': 'striatum', '36': 'cortex',
    '37': 'cortex', '38': 'cortex', '39': 'cortex', '40': 'cortex', '41': 'cortex',
    '42': 'cortex', '43': 'cortex', '44': 'cortex', '45': 'cortex', '46': 'cortex',
    '47': 'cortex', '48': 'cortex', '49': 'cortex', '50': 'cortex', '51': 'cortex',
    '52': 'cortex', '53': 'cortex', '54': 'CC', '55': 'striatum', '56': 'striatum',
    '57': 'striatum', '58': 'striatum', '59': 'striatum', '60': 'striatum',
    '61': 'striatum', '62': 'striatum', '63': 'striatum', '64': 'striatum',
    '65': 'striatum', '66': 'striatum', '67': 'striatum', '68': 'striatum',
    '69': 'striatum', '70': 'striatum'
}

adata.obs['brain_region'] = adata.obs['fov'].map(brain_region)

print(adata.obs.head())

In [None]:
#Cortex
adata_cortex = adata[adata.obs['brain_region'] == "cortex", :]
print(adata_cortex)

In [None]:
group_counts = adata_cortex.obs['leiden_scVI'].value_counts()
valid_groups = group_counts[group_counts >= 30].index
filtered_data = adata_cortex[adata_cortex.obs['leiden_scVI'].isin(valid_groups)].copy()

filtered_data.uns['log1p']["base"] = None

sc.tl.rank_genes_groups(filtered_data, 'leiden_scVI', use_raw=False, layer='counts', method='wilcoxon')

sc.pl.rank_genes_groups(filtered_data, n_genes=25, sharey=False)

In [None]:
all_clusters = adata_cortex.obs['leiden_scVI'].unique()

for cluster_id in all_clusters:
    temp = adata_cortex[adata_cortex.obs['leiden_scVI'] == cluster_id,:].copy()
    
    if temp.n_obs > 1:
        sc.tl.rank_genes_groups(temp, groupby="genotype", use_raw=False, method='wilcoxon')
        df = sc.get.rank_genes_groups_df(temp, group="MT")
        df.to_csv(f'./cortex_gene_exp/cluster{cluster_id}.csv')
    else:
        print(f'Cluster {cluster_id} skipped due to insufficient cell number.')

In [None]:
#Striatum
adata_striatum = adata[adata.obs['brain_region'] == "striatum", :]
print(adata_striatum)

In [None]:
group_counts = adata_striatum.obs['leiden_scVI'].value_counts()
valid_groups = group_counts[group_counts >= 30].index
filtered_data = adata_striatum[adata_striatum.obs['leiden_scVI'].isin(valid_groups)].copy()

filtered_data.uns['log1p']["base"] = None

sc.tl.rank_genes_groups(filtered_data, 'leiden_scVI', use_raw=False, layer='counts', method='wilcoxon')

sc.pl.rank_genes_groups(filtered_data, n_genes=25, sharey=False)

In [None]:
all_clusters = adata_striatum.obs['leiden_scVI'].unique()

for cluster_id in all_clusters:
    temp = adata_striatum[adata_striatum.obs['leiden_scVI'] == cluster_id,:].copy()
    
    if temp.n_obs > 1 and all(temp.obs['genotype'].value_counts() >= 2):
        sc.tl.rank_genes_groups(temp, groupby="genotype", use_raw=False, method='wilcoxon')
        df = sc.get.rank_genes_groups_df(temp, group="MT")
        df.to_csv(f'./striatum_gene_exp/cluster{cluster_id}.csv')
    else:
        print(f'Cluster {cluster_id} skipped: insufficient cells per genotype.')

In [None]:
adata_interneuron = adata[adata.obs['leiden_scVI'] == "8", :]
print(adata_interneuron)

In [None]:
sc.pp.neighbors(adata_interneuron,
                n_neighbors=30,
                use_rep="X_scVI")
sc.tl.umap(adata_interneuron, min_dist=0.1)

In [None]:
sc.tl.leiden(adata_interneuron, key_added="leiden_scVI", resolution=0.9)
sc.pl.umap(adata_interneuron,
           color=['genotype', 'leiden_scVI'],
           ncols=2,
           frameon=False,
           save='IN_1.pdf')

In [None]:
sc.tl.leiden(adata_interneuron, key_added="leiden_scVI", resolution=0.9)
sc.pl.umap(adata_interneuron,
           color=['brain_region2', 'leiden_scVI'],
           ncols=2,
           frameon=False)

In [None]:
sc.pl.umap(adata_interneuron, color=['Pvalb', 'Sst', 'Vip', 'Npy', 'Nnat', 'Mobp'])

In [None]:
cell_type_names = {
    '0': 'Cx_Sst_neuron',
    '1': 'Cx_PV_neuron',
    '2': 'Str_PV_neuron',
    '3': 'Other',
    '4': 'Cx_PV_neuron',
    '5': 'Cx_VIP_neuron',
    '6': 'Str_Th_interneuron',
    '7': 'Str_Npy_Sst_neuron',
    '8': 'Cx_Npy_interneuron',
    '9': 'Other',
    '10': 'Septum_neuron'
}

adata_interneuron.obs['cell_type_name_2'] = adata_interneuron.obs['leiden_scVI'].map(cell_type_names)

In [None]:
sc.pl.umap(adata_interneuron,
           color=['cell_type_name_2', 'leiden_scVI'],
           ncols=2,
           frameon=False)