In [None]:
import scanpy as sc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import my_functions as mf # to fetch my helper functions
## After I make changes to the functions in mf it sometimes needs reloading of the current version
import importlib
importlib.reload(mf)

In [None]:
import os
out_dir = "output"
os.makedirs(out_dir, exist_ok=True)

In [None]:
adata = sc.read_h5ad('/data/projects/robin/segmentation/annotate_infl/annotation_infl_compartments.h5ad')

In [None]:
adata

In [None]:
adata.obs.inflammation_score.describe()

In [None]:
inf_sc = adata.obs.copy()

In [None]:
inf_sc.Patient_Sample_ID.unique()

In [None]:
cols_2_drop = ['x', 'y', 'z', 'cluster', 'n_transcripts', 'density', 'elongation',
       'area', 'avg_confidence', 'avg_assignment_confidence',
       'max_cluster_frac', 'lifespan', 'x_centroid', 'y_centroid', 'cell_area']

In [None]:
inf_sc.drop(columns =cols_2_drop, inplace=True )

In [None]:
inf_sc.head()

In [None]:
inf_sc.compartment.unique()

In [None]:
inf_sc_tub = inf_sc[inf_sc.compartment.str.contains("tubulo")]

In [None]:
inf_sc_tub.compartment.unique()

In [None]:
inf_sc_tub_mean = inf_sc_tub.groupby("Patient_Sample_ID", as_index=False)["inflammation_score"].mean()

In [None]:
inf_sc_tub_mean

In [None]:
inf_sc_tub_mean[["Biopsy_ID", "Disease"]]=inf_sc_tub_mean.Patient_Sample_ID.str.split("_", expand = True)
inf_sc_tub_mean = inf_sc_tub_mean.drop(columns=["Patient_Sample_ID"])


In [None]:
inf_sc_tub_mean

## Clinical Data 

In [None]:
df_clinical_values = pd.read_csv('./output/ClinicalData_medianPC1.csv')
df_clinical_values

In [None]:
#inf_sc_tub_mean.drop(columns = 'Disease', inplace= True)
merged_df = pd.merge(df_clinical_values, inf_sc_tub_mean, left_on='Biopsy_ID', right_on='Biopsy_ID', how='inner')
merged_df

In [None]:
merged_df.Disease.unique()

In [None]:
mf.plot_correlation_scatter(
    merged_df, 
    'inflammation_score', 
    'eGFR', 
    label_pos=[0.65, 0.95],
    method='pearson'
)

In [None]:
mf.plot_correlation_scatter(
    merged_df, 
    'inflammation_score', 
    'eGFR',
    label_pos=[0.65, 0.95],
    method='spearman'
)

In [None]:
mf.plot_correlation_scatter(
    merged_df, 
    'inflammation_score', 
    'albuminuria_gg', 
    label_pos=[0.05, 0.95],
    method='pearson'
)

In [None]:
merged_df.head()

In [None]:
mf.plot_correlation_scatter(
    merged_df, 
    'inflammation_score', 
    'serum_creatinine', 
     label_pos=[0.05, 0.95],
    method='pearson'
)

## For only ANCA patients : correlation with ARR score

In [None]:
ANCA_patients=merged_df[merged_df["disease"] == 'ANCA']

In [None]:
ANCA_patients = ANCA_patients.copy()

ANCA_patients['ARRscore'] = ANCA_patients['ARRscore_SLEclass'].str.extract(r'\((\d+)/')
ANCA_patients["inflammation_score"]=ANCA_patients["inflammation_score"].round(2)

In [None]:
mf.plot_correlation_scatter(
    ANCA_patients, 
    'inflammation_score', 
    'ARRscore', 
    label_pos=[0.05, 0.9],
    method='spearman'
)