In [None]:
import sys
import os
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

# import cell2location
# import scvi

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs

In [None]:
root_path = os.getcwd()

In [None]:
results_folder = os.path.join(root_path, 'deconvolution')
ref_run_name =  os.path.join(results_folder, 'reference_signatures') 
run_name = os.path.join(results_folder, 'cell2location_map')  

In [None]:
adata_file = f"{run_name}/sp.h5ad"
adata_vis = sc.read_h5ad(adata_file)
# mod = cell2location.models.Cell2location.load(f"{run_name}", adata_vis)

In [None]:
df_cellabundance = adata_vis.obsm['q05_cell_abundance_w_sf'].copy()

In [None]:
adata_vis.uns['mod']['factor_names']

In [None]:
df_cellabundance.columns =  adata_vis.uns['mod']['factor_names']

In [None]:
df_cellabundance['Total_abundance_spot'] = df_cellabundance.sum(axis=1)

In [None]:
df_merged  = pd.merge(adata_vis.obs, df_cellabundance,  left_index=True, right_index=True)

In [None]:
df_merged

I need to add the zonation information 

In [None]:
results_zonation_folder = os.path.join(root_path, 'analyzed', 'zonation')
file_names = [f for f in os.listdir(results_zonation_folder) if os.path.isfile(os.path.join(results_zonation_folder, f))]

adata_list = [ad.read(os.path.join(results_zonation_folder, file)) for file in file_names if file.endswith('.h5ad')]

In [None]:
# Combine anndata objects together
adata_concat = sc.concat(
    adata_list,
    label="library_id",
    uns_merge="unique",
    keys=[
        k
        for d in [adata.uns["spatial"] for adata in adata_list]
        for k, v in d.items()
    ],
    index_unique="-",
    join='outer',
)

In [None]:
df_merged_zonation = pd.merge(df_merged, pd.DataFrame(adata_concat.obs['zonation']),  left_index=True, right_index=True)

In [None]:
df_merged_zonation

In [None]:
df_merged_zonation['all_conditions'] = df_merged_zonation['Gender'].astype(str) + '-' + df_merged_zonation['zonation'].astype(str) + '-' + df_merged_zonation['Condition'].astype(str)
df_merged_zonation['sex_treatment'] = df_merged_zonation['Gender'].astype(str) + '-'  + df_merged_zonation['Condition'].astype(str)
df_merged_zonation['sex_zonation'] = df_merged_zonation['Gender'].astype(str) + '-'  + df_merged_zonation['zonation'].astype(str)
df_merged_zonation['zonation_treatment'] = df_merged_zonation['zonation'].astype(str) + '-'  + df_merged_zonation['Condition'].astype(str)

# A very first look to all the cell type abundances per condition (absolute values)

In [None]:
# EDA: Plotting the distribution of a cell type across conditions
import seaborn as sns

for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='all_conditions', y=current_celltype, data=df_merged_zonation, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='Gender', y=current_celltype, data=df_merged_zonation, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

There is a clear unbalance showing much more predicted cells per spot for females than for males. May this be coming from the original number of UMIs/genes per sex?

In [None]:
sns.boxplot(x='Gender', y='Total_abundance_spot' , data=df_merged_zonation, showfliers=False)
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.boxplot(x='Gender', y='total_counts', data=df_merged_zonation, showfliers=False)
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.boxplot(x='Gender', y='n_genes_by_counts', data=df_merged_zonation, showfliers=False)
plt.xticks(rotation=90)
plt.show()

The number of genes and counts per sex is quite similar. Therefore, this does not seem to be the cause. Unbalance numbers in the sc reference used for deconvolution? We need to work clearly with proportions. 

## Working with proportions per spot

In [None]:
adata_vis.obsm['q05_cell_abundance_w_sf']

In [None]:
row_sums = adata_vis.obsm['q05_cell_abundance_w_sf'].sum(axis=1)

In [None]:
row_sums

In [None]:
proportions_df = adata_vis.obsm['q05_cell_abundance_w_sf'].div(row_sums, axis=0)

In [None]:
proportions_df.columns =  adata_vis.uns['mod']['factor_names']

In [None]:
proportions_df

In [None]:
df_merged_prop = pd.merge(adata_concat.obs, proportions_df,  left_index=True, right_index=True)

In [None]:
df_merged_prop['all_conditions'] = df_merged_prop['Gender'].astype(str) + '-' + df_merged_prop['zonation'].astype(str) + '-' + df_merged_prop['Condition'].astype(str)
df_merged_prop['sex_treatment'] = df_merged_prop['Gender'].astype(str) + '-'  + df_merged_prop['Condition'].astype(str)
df_merged_prop['sex_zonation'] = df_merged_prop['Gender'].astype(str) + '-'  + df_merged_prop['zonation'].astype(str)
df_merged_prop['zonation_treatment'] = df_merged_prop['zonation'].astype(str) + '-'  + df_merged_prop['Condition'].astype(str)

In [None]:
# EDA: Plotting the distribution of a cell type across conditions

for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='all_conditions', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='Gender', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='zonation', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='Condition', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='sex_treatment', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='sex_zonation', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

In [None]:
for current_celltype in adata_vis.uns['mod']['factor_names']: 
    sns.boxplot(x='zonation_treatment', y=current_celltype, data=df_merged_prop, showfliers=False)
    plt.xticks(rotation=90)
    plt.show()

## Since we have a lot of values close to zero for some cell types, I would apply Log Transformation to help manage the data skweness and a permutation test to help assess the differences between conditions. I will also separate the analysis between males and females given what we show before. 

In [None]:
# from scipy.stats import mannwhitneyu

In [None]:
# df_cellabundance = adata_vis.obsm['q05_cell_abundance_w_sf'].copy()

In [None]:
# df_cellabundance_logTrans = np.log1p(df_cellabundance) 

In [None]:
# df_cellabundance_logTrans.columns =  adata_vis.uns['mod']['factor_names']

In [None]:
# df_cellabundance_logTrans

In [None]:
# df_merged_LogTrans = pd.merge(adata_vis.obs, df_cellabundance_logTrans,  left_index=True, right_index=True)

In [None]:
# df_merged_LogTrans

In [None]:
# df_merged_LogTrans_zonation = pd.merge(df_merged_LogTrans, pd.DataFrame(adata_concat.obs['zonation']),  left_index=True, right_index=True)

In [None]:
# df_merged_LogTrans_zonation['conditions_region'] =  df_merged_LogTrans_zonation['zonation'].astype(str) + '_' + df_merged_LogTrans_zonation['Condition'].astype(str)

In [None]:
# df_merged_LogTrans_zonation_males = df_merged_LogTrans_zonation[df_merged_LogTrans_zonation['Gender']=='Male']
# df_merged_LogTrans_zonation_females = df_merged_LogTrans_zonation[df_merged_LogTrans_zonation['Gender']=='Female']

In [None]:
# Function to normalize data within each group
# def normalize_data(group_values):
#    mean = np.mean(group_values)
#    std = np.std(group_values, ddof=1)  # Using sample standard deviation
#    normalized_values = (group_values - mean) / std if std != 0 else group_values - mean
#    return normalized_values

# Adapted permutation test function for normalized data
#def permutation_test_normalized(x, y, n_permutations=10000):
    # Normalize both groups
#    x_normalized = normalize_data(x)
#    y_normalized = normalize_data(y)
    
    # Calculate the difference in normalized means
#    diff_obs = np.abs(np.mean(x_normalized) - np.mean(y_normalized))
#    count = 0
#    for _ in range(n_permutations):
#        combined = np.random.permutation(np.hstack((x_normalized, y_normalized)))
#        new_x = combined[:len(x)]
#        new_y = combined[len(x):]
#        diff_perm = np.abs(np.mean(new_x) - np.mean(new_y))
#        if diff_perm >= diff_obs:
#            count += 1
#    p_value = count / n_permutations
#    return diff_obs, p_value

In [None]:
## For males

# Compute the original test statistics (e.g., mean cell abundance for simplicity)
# all_pvalues = []
# all_statistic = []
# all_celltypes = []
# all_condition_regions = []

# condition_regions = df_merged_LogTrans_zonation_males['conditions_region'].unique()
# i=0
# for cell_type in adata_vis.uns['mod']['factor_names']:
#    for current_region in df_merged_LogTrans_zonation_males['zonation'].unique():
#        subset_a = df_merged_LogTrans_zonation_males[(df_merged_LogTrans_zonation_males['Condition'] == current_condition) & (df_merged_LogTrans_zonation_males['zonation'] == current_region)][cell_type]
#        for current_condition in df_merged_LogTrans_zonation_males['Condition'].unique():

In [None]:
# df_merged_LogTrans_zonation_males

In [None]:
# condition_regions = df_merged_LogTrans_zonation_males['conditions_region'].unique()

In [None]:
# condition_regions.split('_')

In [None]:
# condition_regions[0].split('_')

In [None]:
# vector1 = df_merged_LogTrans_zonation['Gender'].unique()
# vector2 = df_merged_LogTrans_zonation['zonation'].unique()
# vector3 = df_merged_LogTrans_zonation['Condition'].unique()
# i=0

# for cell_type in adata_vis.uns['mod']['factor_names']:
# for current_sex in vector1: 
#    for current_condition, current_region in itertools.product(vector2, vector3):
#        print(i)
#        print(current_sex)
#        print(current_condition)
#        print(current_region)
#        i +=1

In [None]:
! jupyter nbconvert --to html 21_02_Deconvolution_C2L_Proportions.ipynb