# Import libraries

In [None]:
import numpy as np
import scanpy as sc
import pandas as pd
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway
import phate, scprep, session_info
from scipy.stats import mannwhitneyu, ttest_ind
from statsmodels.stats.multicomp import MultiComparison

In [None]:
%matplotlib inline
sc.settings.verbosity = 3

# Load data

In [None]:
adata = sc.read_h5ad("../../_m/ipf_dataset.h5ad")
adata

### Subset for AT2 cell type

In [None]:
adata.obs.cell_type.unique()

In [None]:
new_adata = adata[adata.obs["cell_type"] == "ATII"]
new_adata

### Convert to counts matrix

In [None]:
ipf_counts = pd.DataFrame(new_adata.X.toarray(), 
                          columns=[new_adata.var.index],
                          index=new_adata.obs.index)
print(ipf_counts.shape)
ipf_counts.head(2)

In [None]:
scprep.plot.plot_library_size(ipf_counts)

# Preprocessing: Filtering, Normalizing, and Transforming

## Filtering

In [None]:
ipf_counts = scprep.filter.filter_rare_genes(ipf_counts, min_cells=6)
ipf_counts.shape

## Normalization

In [None]:
ipf_counts = scprep.normalize.library_size_normalize(ipf_counts)
ipf_counts.head(2)

## Transformation

In [None]:
ipf_counts = scprep.transform.sqrt(ipf_counts)

# PHATE

In [None]:
phate_operator = phate.PHATE(n_jobs=-2, random_state=13)
Y_phate = phate_operator.fit_transform(ipf_counts)

In [None]:
scprep.plot.scatter2d(Y_phate, ticks=None, label_prefix="PHATE", figsize=(5,5),
                      filename="at2_phate_clustering.pdf", dpi=300)

# Plotting

## Angiotensin II receptor 2 (AGTR2)

In [None]:
# Plotting mitochondrial expression
fig, ax = plt.subplots(1, figsize=(4,4))
scprep.plot.scatter2d(Y_phate, ax=ax, c=ipf_counts['AGTR2'],
                      title='AGTR2', ticks=False, label_prefix='PHATE',
                      filename="at2_phate_clustering.AGTR1.pdf", dpi=300)
fig.tight_layout()

## KDE plot of pericytes

In [None]:
fig, ax = plt.subplots(1, figsize=(4,4))
kde_plot = sns.kdeplot(x=Y_phate[:, 0], y=Y_phate[:, 1], fill=True, ax=ax, zorder=0, 
                       n_levels=100, cmap="inferno")

ax.set_xticks([]); ax.set_yticks([])
ax.set_xlabel('PHATE 1', fontsize=18); ax.set_ylabel('PHATE 2', fontsize=18)
ax.set_title('KDE - AT2', fontsize=20)
fig.tight_layout()

fig = kde_plot.get_figure()
fig.savefig("at2_phate_kde.pdf") 

## Clustering data using PHATE

In [None]:
clusters = phate.cluster.kmeans(phate_operator, k=2)

scprep.plot.scatter2d(Y_phate, c=clusters, s=1,figsize=(4.3,4), ticks=None, label_prefix='PHATE',
                      legend_anchor=(1,1), fontsize=12, title='PHATE clusters',
                      filename="at2_phate_clustering.kmeans.pdf", dpi=300)

fig.tight_layout()

# Comparison of PHATE clusters and AGTR2 expressoin

## Subset data

In [None]:
gene_list = []
for item in ipf_counts.columns:
    gene_list.append(str(item).replace("('", "").replace("',)", ""))

new_adata = new_adata[ipf_counts.index, new_adata.var.index.isin(gene_list)]
new_adata.obsm["X_phate"] = Y_phate
new_adata.obs["phate_clusters"] = ["Group_"+str(clu) for clu in clusters]
new_adata

## Qualitative review

In [None]:
ipf_counts.loc[:, "PHATE"] = clusters
ipf_counts.loc[:, "patient"] = new_adata.obs["patient"]
ipf_counts.loc[:, "disease"] = new_adata.obs["disease"]
df = pl.from_pandas(ipf_counts.loc[:, ["AGTR2", "PHATE", "patient", "disease"]])\
       .rename({"('AGTR2',)": "AGTR2", "('PHATE',)": "PHATE", 
                "('patient',)": "patient", "('disease',)": "disease"})
df.head()

In [None]:
df.write_csv("at2_phate.normalized_expression.tsv", separator="\t")

In [None]:
df.filter(pl.col("AGTR2") > 0)\
  .group_by(["PHATE", "patient", "disease"])\
  .agg([pl.sum("AGTR2").name.suffix("_sum"), 
        pl.mean("AGTR2").name.suffix("_mean"),
        pl.count("AGTR2").name.suffix("_count"),
        pl.std("AGTR2").name.suffix("_std")])\
  .sort("PHATE")

In [None]:
df.filter(pl.col("AGTR2") > 0)\
  .group_by(["PHATE", "disease"])\
  .agg([pl.sum("AGTR2").name.suffix("_sum"), 
        pl.mean("AGTR2").name.suffix("_mean"),
        pl.count("AGTR2").name.suffix("_count"),
        pl.std("AGTR2").name.suffix("_std")])\
  .sort("PHATE")

In [None]:
dx = df.filter(pl.col("PHATE") == 1, pl.col("AGTR2") > 0).group_by(["patient", "disease"]).agg(pl.mean("AGTR2"))
dx.head(2)

In [None]:
ax = sns.boxplot(data=dx, x="disease", y="AGTR2", fill=False, width=0.5)
box_plot = sns.stripplot(data=dx, x="disease", y="AGTR2", ax=ax)
ax.set_xlabel('Disease', fontsize=12); 
ax.set_ylabel('Normalized Expression (AGTR2)', fontsize=12)
fig.tight_layout()
fig = box_plot.get_figure()
fig.savefig("at2_phate.PHATE_cluster_1.boxplot_AGTR2.pdf") 

## Statistical comparison

### One-way ANOVA

In [None]:
c0 = dx.filter(pl.col("disease") == "Control").to_pandas().AGTR2.values
c1 = dx.filter(pl.col("disease") == "COPD").to_pandas().AGTR2.values
c2 = dx.filter(pl.col("disease") == "IPF").to_pandas().AGTR2.values

f_oneway(c0, c1, c2)

### TukeyHSD

In [None]:
model = MultiComparison(dx.to_pandas().AGTR2, dx.to_pandas().disease)
res_mod = model.tukeyhsd()
print(res_mod)

### T-test (two-sided)

In [None]:
ttest_ind(c0, c1) # Control VS COPD

In [None]:
ttest_ind(c0, c2) # Control VS IPF

In [None]:
ttest_ind(c1, c2) # COPD VS IPF

### Mann-Whitney U

#### Two-sided test

In [None]:
mannwhitneyu(c0, c1) # Control VS COPD

In [None]:
mannwhitneyu(c0, c2) # Control VS IPF

In [None]:
mannwhitneyu(c1, c2) # COPD VS IPF

#### One-sided test

In [None]:
mannwhitneyu(c0, c1, alternative="less") # Control VS COPD

In [None]:
mannwhitneyu(c0, c2, alternative="less") # Control VS IPF

In [None]:
mannwhitneyu(c1, c2, alternative="greater") # COPD VS IPF

# Session information

In [None]:
session_info.show()