In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import matplotlib.pyplot as plt
import seaborn as sns

sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white')

In [None]:
adata = sc.read_h5ad('./ad_scvimodel.h5ad')

In [None]:
adata

In [None]:
sc.pl.umap(adata,
           color=['batch', 'Genotype', 'Sex', 'leiden_scVI'],
           ncols=2,
           frameon=False)

In [None]:
genotype_colors = {'WT': '#808080', 'MT': '#8B0000'}

sc.pl.umap(adata, color='Genotype', palette=genotype_colors, size=3, frameon=False, title='Genotype')

In [None]:
sc.pl.umap(adata, color='leiden_scVI', add_outline=True, legend_loc='on data',
           legend_fontsize=12, legend_fontoutline=2,frameon=False,
           title='clustering of cells')

In [None]:
sc.pl.umap(adata, color=['Naaa', 'Mki67', 'Ascl1', 'Neurod1', 'Dlx1', 'Aldh1l1', 'Pdgfra','Mbp', 'Cx3cr1', 'Bgn', 'Cldn5'])

In [None]:
cell_type_names = {
    '0': 'Astrocyte',
    '1': 'Inhibitory_neuron',
    '2': 'OPC',
    '3': 'In_progenitor',
    '4': 'Ex_neuron',
    '5': 'Astrocyte_progenitor',
    '6': 'Astrocyte',
    '7': 'NPC',
    '8': 'OPC',
    '9': 'Inhibitory_neuron',
    '10': 'Pericyte',
    '11': 'Oligodendrocyte',
    '12': 'pri-OPC',
    '13': 'Microglia',
    '14': 'Ex_neuron',
    '15': 'Endothelial_cell',
    '16': 'Astrocyte',
    '17': 'Pericyte'
}

adata.obs['cell_type_name'] = adata.obs['leiden_scVI'].map(cell_type_names)

In [None]:
sc.pl.umap(adata,
           color=['Genotype', 'cell_type_name'],
           ncols=2,
           frameon=False)

In [None]:
cluster_to_color = {
    'Astrocyte': '#2ca02c',
    'Astrocyte_progenitor': '#f3fbd4',
    'Endothelial_cell': '#7f7f7f',
    'Ex_neuron': '#ff7f0e',
    'In_progenitor': '#ff9896',
    'Inhibitory_neuron': '#d62728',
    'Microglia': '#8c564b',
    'NPC': '#313695',
    'pri-OPC': '#aec7e8',
    'OPC': '#1f77b4',
    'Oligodendrocyte': '#9467bd',
    'Pericyte': '#ffbb78'
}

adata.obs['cell_type_name_colors'] = adata.obs['cell_type_name'].map(cluster_to_color)

palette = [cluster_to_color[cluster] for cluster in adata.obs['cell_type_name'].cat.categories]

sc.pl.umap(adata,
           color=['Genotype', 'cell_type_name'],
           ncols=2,
           frameon=False)

In [None]:
genes_of_interest = ['Aldh1l1', 'Plp1', 'Pdgfra', 'Olig2', 'Neurod1', 'Dlx1', 'Naaa', 'Cx3cr1', 'Cldn5', 'Bgn', 'Mki67']

sc.pl.dotplot(
    adata,
    genes_of_interest,
    groupby='cell_type_name',
    dendrogram=True,
    color_map="Blues",
    swap_axes=True,
    use_raw=False,
    standard_scale='var')

In [None]:
adata_temp = adata.obs[['Genotype', 'cell_type_name']]

In [None]:
adata_temp

In [None]:
adata_temp.groupby('cell_type_name')['Genotype'].value_counts(normalize=False)

In [None]:
data = {
    'cell_type_name': ['Astrocyte', 'Astrocyte', 'Astrocyte_progenitor', 'Astrocyte_progenitor',
                       'Endothelial_cell', 'Endothelial_cell', 'Ex_neuron', 'Ex_neuron',
                       'In_progenitor', 'In_progenitor', 'Inhibitory_neuron', 'Inhibitory_neuron',
                       'Microglia', 'Microglia', 'NPC', 'NPC', 'OPC', 'OPC',
                       'Oligodendrocyte', 'Oligodendrocyte', 'pri-OPC', 'pri-OPC'],
    'Genotype': ['WT', 'MT', 'WT', 'MT', 'MT', 'WT', 'WT', 'MT', 'WT', 'MT',
                 'WT', 'MT', 'WT', 'MT', 'WT', 'MT', 'MT', 'WT', 'MT', 'WT',
                 'MT', 'WT'],
    'Counts': [2496, 2394, 557, 549, 523, 321, 852, 682, 753, 655, 1461, 1265,
               234, 117, 437, 428, 1681, 1133, 348, 202, 298, 235]
}

df = pd.DataFrame(data)

total_wt = df[df['Genotype'] == 'WT']['Counts'].sum()
total_mt = df[df['Genotype'] == 'MT']['Counts'].sum()

df['Percentage'] = df.apply(lambda x: x['Counts'] / total_wt * 100 if x['Genotype'] == 'WT' else x['Counts'] / total_mt * 100, axis=1)

wt_data = df[df['Genotype'] == 'WT'][['cell_type_name', 'Percentage']].rename(columns={'Percentage': 'WT_Percentage'})
mt_data = df[df['Genotype'] == 'MT'][['cell_type_name', 'Percentage']].rename(columns={'Percentage': 'MT_Percentage'})
combined_data = pd.merge(wt_data, mt_data, on='cell_type_name', how='outer').fillna(0)

fig, ax = plt.subplots(figsize=(10, 8))
indices = range(len(combined_data))
bar_width = 0.35

ax.bar(indices, combined_data['WT_Percentage'], width=bar_width, label='WT', color='#1f77b4')
ax.bar([i + bar_width for i in indices], combined_data['MT_Percentage'], width=bar_width, label='MT', color='#ff7f0e')

ax.set_xlabel('Cell Type')
ax.set_ylabel('Percentage')
ax.set_title('Percentage of Each Cell Type by Genotype')
ax.set_xticks([i + bar_width / 2 for i in indices])
ax.set_xticklabels(combined_data['cell_type_name'], rotation=90)
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
adata.uns['log1p']["base"] = None
sc.tl.rank_genes_groups(adata, 'leiden_scVI', use_raw=False, layer='counts', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
adata.uns['log1p']["base"] = None
sc.tl.rank_genes_groups(adata, 'cell_type_name', use_raw=False, layer='counts', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
unique_cell_types = adata.obs['cell_type_name'].unique()

for cell_type in unique_cell_types:
    temp = adata[adata.obs['cell_type_name'] == cell_type, :]
    
    sc.tl.rank_genes_groups(temp, groupby='Genotype', use_raw=True, method='wilcoxon')
    
    df = sc.get.rank_genes_groups_df(temp, group='MT')
    df.to_csv(f'./rank_genes/{cell_type}_rank_genes.csv')

In [None]:
combined_df = pd.DataFrame()

for cell_type_names, cell_type in cell_type_names.items():
    temp_df = pd.read_csv(f'./rank_genes/{cell_type}_rank_genes.csv')
    temp_df = temp_df[temp_df['pvals_adj'] < 0.05]
    temp_df['cluster'] = cell_type
    combined_df = pd.concat([combined_df, temp_df])

combined_df = combined_df.drop_duplicates()

ordered_cell_types = [
    'Ex_neuron',
    'In_progenitor',
    'Inhibitory_neuron',
    'NPC',
    'Astrocyte_progenitor',
    'Astrocyte',
    'pri-OPC',
    'OPC',
    'Oligodendrocyte',
    'Microglia',
    'Endothelial_cell',
    'Pericyte'
]

combined_df['cluster'] = pd.Categorical(combined_df['cluster'], categories=ordered_cell_types, ordered=True)
combined_df.sort_values('cluster', inplace=True)

color_mapping = {
    'Astrocyte': '#2ca02c',
    'Astrocyte_progenitor': '#f3fbd4',
    'Endothelial_cell': '#7f7f7f',
    'Ex_neuron': '#ff7f0e',
    'In_progenitor': '#ff9896',
    'Inhibitory_neuron': '#d62728',
    'Microglia': '#8c564b',
    'NPC': '#313695',
    'pri-OPC': '#aec7e8',
    'OPC': '#1f77b4',
    'Oligodendrocyte': '#9467bd',
    'Pericyte': '#ffbb78'
}

plt.figure(figsize=(12, 8))
strip_plot = sns.stripplot(x="cluster", y="logfoldchanges", data=combined_df, jitter=0.3, palette=color_mapping)

plt.ylim(-5, 5)

for i, cell_type in enumerate(ordered_cell_types):
    num_pos_genes = combined_df[(combined_df['cluster'] == cell_type) & (combined_df['logfoldchanges'] > 0)].shape[0]
    num_neg_genes = combined_df[(combined_df['cluster'] == cell_type) & (combined_df['logfoldchanges'] < 0)].shape[0]
    plt.text(i, 4.5, f'P={num_pos_genes}', ha='center', va='bottom', fontsize=9)
    plt.text(i, -4.5, f'N={num_neg_genes}', ha='center', va='top', fontsize=9)

plt.title('Log2 fold changes for each cell type with significance')
plt.xlabel('Cell Type')
plt.ylabel('Average log2 fold change')

plt.legend([], [], frameon=False)

plt.show()