# COMPOSITIONAL ANALYSIS

# Python packages

In [None]:
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")

import pandas as pd
import numpy as np
import scanpy as sc
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
import pertpy as pt
import scvi
import anndata
import matplotlib
from collections import Counter

from modules.visualize import *
from modules.deag_tools import *
from modules.utils import *

# Load dataset

In [None]:
sample_tag_mapping = {'WT-DMSO':'A_WT-DMSO',
                      '3xTg-DMSO':'B_3xTg-DMSO',
                      'WT-SCDi':'C_WT-SCDi',
                      '3xTg-SCDi':'D_3xTg-SCDi',
                      'Undetermined':'Undetermined',
                      'Multiplet':'Multiplet'}

In [None]:
adata = sc.read(
    filename="data/fede_data/scdi_hypothalamus_clustered.h5ad"
)

In [None]:
adata.obs['Sample_Tag'] = adata.obs['Sample_Tag'].map(sample_tag_mapping)

In [None]:
adata = adata[adata.obs['Sample_Tag'] != "Undetermined", :]

# Generate pseudosamples

In [None]:
adata = assign_pseudoreplicates(adata)

# Filter out low frequency cell types

In [None]:
class_counts = adata.obs['cluster_subclass_name'].value_counts()
valid_classes = class_counts[class_counts >= 100].index
adata = adata[adata.obs['cluster_subclass_name'].isin(valid_classes)].copy()

# Cell type abundance histogram

In [None]:
plot_cell_type_abundances(adata)

# Compositional analysis

In [None]:
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata,
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="cluster_subclass_name",
    sample_identifier="batch",
    covariate_obs=["Sample_Tag"],
)

In [None]:
ax = pt.pl.coda.boxplots(
    sccoda_data,
    modality_key="coda",
    feature_name="Sample_Tag",
    figsize=(12, 6),
    add_dots=True,
)

# Adjust the dot size if possible
for collection in ax.collections:
    if isinstance(collection, matplotlib.collections.PathCollection):
        collection.set_sizes([10])  # Adjust this value to make dots smaller

plt.tight_layout()
plt.savefig('figures/coda_boxplots.png', bbox_inches='tight')
plt.show()

In [None]:
pt.pl.coda.stacked_barplot(
    sccoda_data, modality_key="coda", feature_name="Sample_Tag", figsize=(4, 2)
)
plt.tight_layout()
plt.savefig('figures/coda_stacked_barplot.png',bbox_inches='tight')
plt.show()

In [None]:
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda",
    formula="Sample_Tag",
    reference_cell_type="automatic",
)

In [None]:
sccoda_model.run_nuts(sccoda_data, modality_key="coda", rng_key=1234)

In [None]:
sccoda_model.set_fdr(sccoda_data, 0.05)

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda")

In [None]:
credible_effects = sccoda_model.credible_effects(sccoda_data, modality_key="coda")

# Converting the credible_effects to a pandas DataFrame
credible_effects_df = pd.DataFrame(credible_effects)
print(credible_effects_df)

In [None]:
credible_effects_df

In [None]:
pt.pl.coda.effects_barplot(sccoda_data, "coda", "Sample_Tag")
plt.tight_layout()
plt.savefig('figures/coda_effects_barplot.png', bbox_inches='tight')
plt.show()