(c) This notebook is an asset of: https://github.com/tubml-pathology/xMIL-Pathways

Please note the license and citation instructions as described in the above repository.

In [None]:
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.stats.multitest import multipletests

from scipy import stats
from statsmodels.stats.multitest import multipletests
import numpy as np
from matplotlib.patches import Arc
import matplotlib.patches as mpatches

### Load data and create plots

In [None]:
base_path_project = Path("/path/to/zenodo_sample/results")

In [None]:

suffix = "pSTAT3/df_tiles_cells_lrps.csv"
base_data_path = base_path_project / "ihc_he_analysis"

storing_path = base_path_project / 'boxplots'
storing_path.mkdir(parents=True, exist_ok=True)

In [None]:

base_path_segmentations = base_path_project / "segmentation/preds/postprocessed"
    
# Path to patient separated segmentation (for each patient one "[case_id]_labels.csv" file)
border_seg_path = base_path_segmentations / "tumor_border/v0/width=3/" 

# Path to file containing the patch level prediction for tumor vs. non-tumor
tissue_compartment_prediction_df = pd.read_csv(base_path_segmentations / "test_predictions.csv")
tissue_compartment_prediction_df = tissue_compartment_prediction_df.drop(columns=[x for x in tissue_compartment_prediction_df.columns if x.startswith('Unnamed: ')])
tissue_compartment_prediction_df.head()

#### Prepare tissue region segmentation

In [None]:
def get_cls(lbl):
    if lbl == 0:
        return "non-tumor"
    elif lbl == 1:
        return "tumor"
    else:
        return "border"

tissue_compartment_prediction_df['cls_smooth_with_border'] = tissue_compartment_prediction_df['prediction_score1_border'].apply(get_cls)
tissue_compartment_prediction_df.head()

In [None]:
tissue_compartment_prediction_df['cls_smooth_with_border'].unique()

#### Load pathway cell activation measurements

In [None]:
tissue_compartment_prediction_df['prediction_score1_border'].unique()

In [None]:
dfs = {}
for path in base_data_path.rglob(suffix):
    curr_df = pd.read_csv(path)
    curr_df = curr_df.drop(columns=['Unnamed: 0'])
    dfs[path.parts[-3]] = curr_df

In [None]:
dfs.keys(), len(dfs.keys())

In [None]:
# get patch level classifier predictions and label classes

tissue_compartment_prediction_df['case_id'] = tissue_compartment_prediction_df['slide_id'].apply(lambda x: x.split('..')[0])
tissue_compartment_prediction_df['slide_name'] = tissue_compartment_prediction_df['slide_id'].apply(lambda x: x.split('..')[1])

# def clf_one_patch(val):
#     if val == 1:
#         return "tumor (p=1)"
#     elif val == 0:
#         return "non-tumor (p=0)"
#     else:
#         return "uncertain (0<p<1)"
        
# tissue_compartment_prediction_df.loc[:,'pred_class'] = tissue_compartment_prediction_df['prediction_score1_smooth'].apply(clf_one_patch)

#### Combine patch level predictions and tissue region segmentation

In [None]:
# print(f"Before merge {tissue_compartment_prediction_df.shape=}, {border_seg_df.shape=}")
# tissue_compartment_prediction_df = pd.merge(tissue_compartment_prediction_df, border_seg_df, on=['slide_id', 'patch_id'])
# print(f"After merge {tissue_compartment_prediction_df.shape=}")

In [None]:
print(f"Nr. of total patches in cell_activation: ", pd.concat(dfs.values()).shape)

#### Combine cell activations per patch and tissue region segmentation

In [None]:
all_corr_res = []
all_data = []

for pat_name, df in dfs.items():
    print(pat_name)
    one_pat_patch_clf = tissue_compartment_prediction_df[tissue_compartment_prediction_df['case_id']==pat_name].copy().reset_index()
    one_pat_full = pd.merge(df, one_pat_patch_clf, how='inner', on='patch_id') 
    all_data.append(one_pat_full)

    print()

#### Store created dataframe

In [None]:
all_data = pd.concat(all_data)
all_data.shape

In [None]:
all_data['sum_cell_act_cont'].describe()

In [None]:
all_data['mean_lrp_sign'] = all_data['mean_lrp'].apply(lambda x: -1 if x<=0 else 1)

In [None]:
all_data

In [None]:
all_data

In [None]:
fn = storing_path / 'pstat3.csv'
all_data.to_csv(fn)
print(f"stored merged cell activations at {fn=}")