In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import quiche as qu
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

## unstructured

In [None]:
save_directory = os.path.join('publications', 'supplementary_figures', 'supplementary_figure05', 'unstructured')
qu.pp.make_directory(save_directory)
directory = os.path.join('data', 'simulated', 'unstructured', 'metrics', 'n5000','t20', 'balanced')
files = glob.glob(directory+'/*.csv')
evaluation_df = pd.DataFrame()
for file in files:
    id_list = file.split('/')[-1].split('.csv')[0].split('_')
    method_id = id_list[1]
    data = pd.read_csv(file, index_col = 0)
    if method_id == 'quiche':
        val = '_'.join(id_list[-5:])
    elif method_id == 'kmeans':
        val = '_'.join([id_list[1], id_list[-1]])
    elif method_id == 'cell':
        val = '_'.join([id_list[1], id_list[2], id_list[-1]])
    else:
        val = 'default'
    data['param'] = val
    data['method_param'] = data['method'] + '_' + data['param']
    evaluation_df = pd.concat([evaluation_df, data], axis = 0)

In [None]:
method_arr = ['evaluate_kmeans_kmeans_3', 'evaluate_kmeans_kmeans_5', 'evaluate_kmeans_kmeans_7',
 'evaluate_cell_charter_cell_charter_3', 'evaluate_cell_charter_cell_charter_5']

title_list = ['KMeans3', 'KMeans5', 'KMeans7','CellCharter3','CellCharter5']
for i in range(0, len(method_arr)):
    method = method_arr[i]
    avg_data = evaluation_df[(evaluation_df['variable'] == 'group_recall') & (evaluation_df['method_param'] == method)].groupby(['ratio', 'grid_size'])['value'].mean()
    avg_data = avg_data.reset_index()
    pivot_df = avg_data.pivot(index='grid_size', columns='ratio', values='value')
    pivot_df = pivot_df.loc[[14,10,9,8,7,6,5,4], :]
    pivot_df.index = np.round(100 / (pivot_df.index * pivot_df.index), 1)
    pivot_df.columns = pivot_df.columns * 100
    pivot_df.columns = pivot_df.columns.astype('int')

    plt.figure(figsize=(4.5,5))
    g = sns.heatmap(pivot_df, annot=True, annot_kws={"size": 12},xticklabels = True, yticklabels=True, linewidths=0.5,fmt='.1f', vmin = 0, vmax = 1, cmap = 'Purples') #compare
    g.tick_params(labelsize = 14)
    g.set_xlabel('patient samples with niche (%)', fontsize = 14)
    g.set_ylabel('niche size (% sample)', fontsize = 14)
    g.set_title(title_list[i], fontsize = 14)
    plt.savefig(os.path.join(save_directory, f'{title_list[i]}_group_recall.pdf'), bbox_inches = 'tight')

In [None]:
method_arr = ['evaluate_kmeans_kmeans_3', 'evaluate_kmeans_kmeans_5', 'evaluate_kmeans_kmeans_7',
 'evaluate_cell_charter_cell_charter_3', 'evaluate_cell_charter_cell_charter_5']

title_list = ['KMeans3', 'KMeans5', 'KMeans7','CellCharter3','CellCharter5']
for i in range(0, len(method_arr)):
    method = method_arr[i]
    avg_data = evaluation_df[(evaluation_df['variable'] == 'avg_purity') & (evaluation_df['method_param'] == method)].groupby(['ratio', 'grid_size'])['value'].mean()
    avg_data = avg_data.reset_index()
    pivot_df = avg_data.pivot(index='grid_size', columns='ratio', values='value')
    pivot_df = pivot_df.loc[[14,10,9,8,7,6,5,4], :]
    pivot_df.index = np.round(100 / (pivot_df.index * pivot_df.index), 1)
    pivot_df.columns = pivot_df.columns * 100
    pivot_df.columns = pivot_df.columns.astype('int')

    plt.figure(figsize=(4.5,5))
    g = sns.heatmap(pivot_df, annot=True, annot_kws={"size": 12},xticklabels = True, yticklabels=True, linewidths=0.5,fmt='.1f', vmin = 0, vmax = 1, cmap = 'Purples') #compare
    g.tick_params(labelsize = 14)
    g.set_xlabel('patient samples with niche (%)', fontsize = 14)
    g.set_ylabel('niche size (% sample)', fontsize = 14)
    g.set_title(title_list[i], fontsize = 14)
    plt.savefig(os.path.join(save_directory, f'{title_list[i]}_purity.pdf'), bbox_inches = 'tight')

## structured

In [None]:
save_directory = os.path.join('publications', 'supplementary_figures', 'supplementary_figure05', 'structured')
qu.pp.make_directory(save_directory)
directory = os.path.join('data', 'simulated', 'structured', 'metrics')
files = glob.glob(directory+'/*.csv')
evaluation_df = pd.DataFrame()
for file in files:
    id_list = file.split('/')[-1].split('.csv')[0].split('_')
    method_id = id_list[1]
    data = pd.read_csv(file, index_col = 0)
    if method_id == 'quiche':
        val = '_'.join(id_list[-5:])
    elif method_id == 'kmeans':
        val = '_'.join([id_list[1], id_list[-1]])
    elif method_id == 'cell':
        val = '_'.join([id_list[1], id_list[2], id_list[-1]])
    else:
        val = 'default'
    data['param'] = val
    data['method_param'] = data['method'] + '_' + data['param']
    evaluation_df = pd.concat([evaluation_df, data], axis = 0)

In [None]:
method_arr = ['evaluate_kmeans_kmeans_3', 'evaluate_kmeans_kmeans_5', 'evaluate_kmeans_kmeans_7', 'evaluate_cell_charter_cell_charter_3', 'evaluate_cell_charter_cell_charter_5', 'evaluate_cell_charter_cell_charter_10']
title_list = ['KMeans3', 'KMeans5', 'KMeans7', 'CellCharter3', 'CellCharter5', 'CellCharter7']
evaluation_df['num'] = (evaluation_df['pct_change'] * evaluation_df['radius']).astype('int')
evaluation_df['num_2'] = (evaluation_df['pct_change']*100).astype('int').astype('str') + '% r='+ evaluation_df['radius'].astype('str')
for i in range(0, len(method_arr)):
    method = method_arr[i]
    avg_data = evaluation_df[(evaluation_df['variable'] == 'avg_purity') & (evaluation_df['method_param'] == method)].groupby(['prevalence', 'num'])['value'].mean()
    avg_data = avg_data.reset_index()
    pivot_df = avg_data.pivot(index='num', columns='prevalence', values='value')
    pivot_df = pivot_df[pivot_df.index != 62]
    pivot_df.columns = pivot_df.columns *100
    pivot_df.columns = pivot_df.columns.astype('int')
    plt.figure(figsize=(4.5,5))
    g = sns.heatmap(pivot_df, annot=True, annot_kws={"size": 12},xticklabels = True, yticklabels=True, linewidths=0.5,fmt='.1f', vmin = 0, vmax = 1, cmap = 'Purples') #compare
    g.tick_params(labelsize = 14)
    g.set_xlabel('patient samples with niche (%)', fontsize = 14)
    g.set_ylabel('number of immune cells within niche', fontsize = 14)
    g.set_title(title_list[i], fontsize = 14)
    plt.savefig(os.path.join(save_directory, f'{title_list[i]}_group_purity.pdf'), bbox_inches = 'tight')

In [None]:
method_arr = ['evaluate_kmeans_kmeans_3', 'evaluate_kmeans_kmeans_5', 'evaluate_kmeans_kmeans_7', 'evaluate_cell_charter_cell_charter_3', 'evaluate_cell_charter_cell_charter_5', 'evaluate_cell_charter_cell_charter_10']
title_list = ['KMeans3', 'KMeans5', 'KMeans7', 'CellCharter3', 'CellCharter5', 'CellCharter7']
evaluation_df['num'] = (evaluation_df['pct_change'] * evaluation_df['radius']).astype('int')
evaluation_df['num_2'] = (evaluation_df['pct_change']*100).astype('int').astype('str') + '% r='+ evaluation_df['radius'].astype('str')
for i in range(0, len(method_arr)):
    method = method_arr[i]
    avg_data = evaluation_df[(evaluation_df['variable'] == 'group_recall') & (evaluation_df['method_param'] == method)].groupby(['prevalence', 'num'])['value'].mean()
    avg_data = avg_data.reset_index()
    pivot_df = avg_data.pivot(index='num', columns='prevalence', values='value')
    pivot_df = pivot_df[pivot_df.index != 62]
    pivot_df.columns = pivot_df.columns *100
    pivot_df.columns = pivot_df.columns.astype('int')
    plt.figure(figsize=(4.5,5))
    g = sns.heatmap(pivot_df, annot=True, annot_kws={"size": 12},xticklabels = True, yticklabels=True, linewidths=0.5,fmt='.1f', vmin = 0, vmax = 1, cmap = 'Purples') #compare
    g.tick_params(labelsize = 14)
    g.set_xlabel('patient samples with niche (%)', fontsize = 14)
    g.set_ylabel('number of immune cells within niche', fontsize = 14)
    g.set_title(title_list[i], fontsize = 14)
    plt.savefig(os.path.join(save_directory, f'{title_list[i]}_group_recall.pdf'), bbox_inches = 'tight')