# Notes

# Load Modules

In [1]:
import numpy as np
import pandas as pd

In [2]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.sans-serif'] = 'Arial'
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint
%matplotlib inline

# Load data

In [3]:
simulated_radius_metric_df = pd.read_csv('../../raw_data/simulated/metrics.csv')
simulated_radius_metric_df.head()

Unnamed: 0,run_name,Method,Params,n_neighbors,hidden_feats,k,n_gcn_layers,correlation,moran_I
0,simulated_data_base,ONTraC,base,50,4,6,2,0.99184,0.982808
1,simulated_data_n_neighbors_10,ONTraC,n_neighbors,10,4,6,2,0.988282,0.978387
2,simulated_data_n_neighbors_20,ONTraC,n_neighbors,20,4,6,2,0.994598,0.978973
3,simulated_data_n_neighbors_100,ONTraC,n_neighbors,100,4,6,2,0.990794,0.983458
4,simulated_data_hidden_feats_2,ONTraC,hidden_feats,50,2,6,2,0.994572,0.986326


In [4]:
simulated_linear_metric_df = pd.read_csv('../../raw_data/simulated_linear/metrics.csv')
simulated_linear_metric_df.head()

Unnamed: 0,run_name,Method,Params,n_neighbors,hidden_feats,k,n_gcn_layers,correlation,moran_I
0,simulated_linear_base,ONTraC,base,50,4,6,2,0.993052,0.972823
1,simulated_linear_n_neighbors_10,ONTraC,n_neighbors,10,4,6,2,0.546991,0.873879
2,simulated_linear_n_neighbors_20,ONTraC,n_neighbors,20,4,6,2,0.991541,0.970096
3,simulated_linear_n_neighbors_100,ONTraC,n_neighbors,100,4,6,2,0.995195,0.972763
4,simulated_linear_hidden_feats_2,ONTraC,hidden_feats,50,2,6,2,0.994266,0.970664


In [5]:
simulated_non_linear_metric_df = pd.read_csv('../../raw_data/simulated_non_linear/metrics.csv')
simulated_non_linear_metric_df.head()

Unnamed: 0,run_name,Method,Params,n_neighbors,hidden_feats,k,n_gcn_layers,correlation,moran_I
0,simulated_non_linear_base,ONTraC,base,50,4,6,2,0.992884,0.976596
1,simulated_non_linear_n_neighbors_10,ONTraC,n_neighbors,10,4,6,2,0.96943,0.975205
2,simulated_non_linear_n_neighbors_20,ONTraC,n_neighbors,20,4,6,2,0.991462,0.975034
3,simulated_non_linear_n_neighbors_100,ONTraC,n_neighbors,100,4,6,2,0.995406,0.97709
4,simulated_non_linear_hidden_feats_2,ONTraC,hidden_feats,50,2,6,2,0.995374,0.975097


In [6]:
simulated_disconnected_metric_df = pd.read_csv('../../raw_data/simulated_disconnected/metrics.csv')
simulated_disconnected_metric_df.head()

Unnamed: 0,run_name,Method,Params,n_neighbors,hidden_feats,k,n_gcn_layers,correlation,moran_I
0,simulated_disconnected_base,ONTraC,base,50,4,6,2,0.985447,0.955703
1,simulated_disconnected_n_neighbors_10,ONTraC,n_neighbors,10,4,6,2,0.992005,0.947859
2,simulated_disconnected_n_neighbors_20,ONTraC,n_neighbors,20,4,6,2,0.988819,0.95119
3,simulated_disconnected_n_neighbors_100,ONTraC,n_neighbors,100,4,6,2,0.894578,0.950458
4,simulated_disconnected_hidden_feats_2,ONTraC,hidden_feats,50,2,6,2,0.990689,0.954154


# selected merfish samples

In [None]:
def gen_integrate_res():
    for index, row in simulated_radius_metric_df.iterrows():
        yield 'simulated dataset 1', row['Params'], row['n_neighbors'], row['hidden_feats'], row['k'], row['n_gcn_layers'], row['correlation']
    for index, row in simulated_linear_metric_df.iterrows():
        yield 'simulated dataset 2', row['Params'], row['n_neighbors'], row['hidden_feats'], row['k'], row['n_gcn_layers'], row['correlation']
    for index, row in simulated_non_linear_metric_df.iterrows():
        yield 'simulated dataset 3', row['Params'], row['n_neighbors'], row['hidden_feats'], row['k'], row['n_gcn_layers'], row['correlation']
    for index, row in simulated_disconnected_metric_df.iterrows():
        yield 'simulated dataset 4', row['Params'], row['n_neighbors'], row['hidden_feats'], row['k'], row['n_gcn_layers'], row['correlation']


integrate_df = pd.DataFrame(gen_integrate_res(),
                            columns = [
                                'dataset', 'Params', 'n_neighbors', 'hidden_feats', 'k', 'n_gcn_layers', 'correlation'
                            ])
integrate_df.head()

Unnamed: 0,dataset,Params,n_neighbors,hidden_feats,k,n_gcn_layers,correlation
0,simulated dataset 1,base,50,4,6,2,0.99184
1,simulated dataset 1,n_neighbors,10,4,6,2,0.988282
2,simulated dataset 1,n_neighbors,20,4,6,2,0.994598
3,simulated dataset 1,n_neighbors,100,4,6,2,0.990794
4,simulated dataset 1,hidden_feats,50,2,6,2,0.994572


In [10]:
integrate_df['n_neighbors'] = integrate_df['n_neighbors'].astype('category')
integrate_df['hidden_feats'] = integrate_df['hidden_feats'].astype('category')
integrate_df['k'] = integrate_df['k'].astype('category')
integrate_df['n_gcn_layers'] = integrate_df['n_gcn_layers'].astype('category')

In [None]:
with sns.axes_style('white', rc={
        'xtick.bottom': True,
        'ytick.left': True
}), sns.plotting_context('paper',
                         rc={
                             'axes.titlesize': 6,
                             'axes.labelsize': 6,
                             'xtick.labelsize': 5,
                             'ytick.labelsize': 5,
                             'legend.fontsize': 5
                         }):
    
    fig, axes = plt.subplots(2, 1, figsize=(5,4))
    key = 'n_neighbors'
    filter_flag = [True if x == 'base' or x == key else False for x in integrate_df['Params']]
    data_df = integrate_df[filter_flag]
    
    sns.barplot(data=data_df,
                x='dataset',
                y='correlation',
                order=['simulated dataset 1',
                       'simulated dataset 2',
                       'simulated dataset 3',
                       'simulated dataset 4',
                      ],
                hue=key,
                palette={10: 'lightgray', 20: 'darkgray', 50: 'red', 100: 'gray'},
                ax=axes[0])
    axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation='vertical')
    axes[0].set_ylabel('Spearman correlation')
    axes[0].legend(loc="upper left", bbox_to_anchor=(1,1))
    
#     sns.barplot(data=data_df,
#                 x='dataset',
#                 y='correlation',
#                 order=[
#                        'mouse2_slice99',
#                        'mouse2_slice169',
#                        'mouse1_slice301',
#                       ],
#                 hue=key,
#                 palette={10: 'lightgray', 20: 'darkgray', 50: 'red', 100: 'gray'},
#                 ax=axes[1])
#     axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation='vertical')
#     axes[1].set_ylabel('Spearman correlation')
#     axes[1].legend(loc="upper left", bbox_to_anchor=(1,1))
    
    fig.tight_layout()
    fig.savefig(f'corr_{key}.pdf', transparent=True)
    
    fig, axes = plt.subplots(2, 1, figsize=(5,4))
    key = 'hidden_feats'
    filter_flag = [True if x == 'base' or x == key else False for x in integrate_df['Params']]
    data_df = integrate_df[filter_flag]
    
    sns.barplot(data=data_df,
                x='dataset',
                y='correlation',
                order=['simulated dataset 1',
                       'simulated dataset 2',
                       'simulated dataset 3',
                       'simulated dataset 4',
                      ],
                hue=key,
                palette={2: 'lightgray', 4: 'red', 8: 'darkgray', 16: 'gray'},
                ax=axes[0])
    axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation='vertical')
    axes[0].set_ylabel('Spearman correlation')
    axes[0].legend(loc="upper left", bbox_to_anchor=(1,1))
    
#     sns.barplot(data=data_df,
#                 x='dataset',
#                 y='correlation',
#                 order=[
#                        'mouse2_slice99',
#                        'mouse2_slice169',
#                        'mouse1_slice301',
#                       ],
#                 hue=key,
#                 palette={2: 'lightgray', 4: 'red', 8: 'darkgray', 16: 'gray'},
#                 ax=axes[1])
#     axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation='vertical')
#     axes[1].set_ylabel('Spearman correlation')
#     axes[1].legend(loc="upper left", bbox_to_anchor=(1,1))
    
    fig.tight_layout()
    fig.savefig(f'corr_{key}.pdf', transparent=True)
    
    fig, axes = plt.subplots(2, 1, figsize=(5,4))
    key = 'k'
    filter_flag = [True if x == 'base' or x == key else False for x in integrate_df['Params']]
    data_df = integrate_df[filter_flag]
    
    sns.barplot(data=data_df,
                x='dataset',
                y='correlation',
                order=['simulated dataset 1',
                       'simulated dataset 2',
                       'simulated dataset 3',
                       'simulated dataset 4',
                      ],
                hue=key,
                palette={4: 'lightgray', 6: 'red', 8: 'darkgray', 10: 'gray'},
                ax=axes[0])
    axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation='vertical')
    axes[0].set_ylabel('Spearman correlation')
    axes[0].legend(loc="upper left", bbox_to_anchor=(1,1))
    
#     sns.barplot(data=data_df,
#                 x='dataset',
#                 y='correlation',
#                 order=[
#                        'mouse2_slice99',
#                        'mouse2_slice169',
#                        'mouse1_slice301',
#                       ],
#                 hue=key,
#                 palette={4: 'lightgray', 6: 'red', 8: 'darkgray', 10: 'gray'},
#                 ax=axes[1])
#     axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation='vertical')
#     axes[1].set_ylabel('Spearman correlation')
#     axes[1].legend(loc="upper left", bbox_to_anchor=(1,1))
    
    fig.tight_layout()
    fig.savefig(f'corr_{key}.pdf', transparent=True)
    
    fig, axes = plt.subplots(2, 1, figsize=(5,4))
    key = 'n_gcn_layers'
    filter_flag = [True if x == 'base' or x == key else False for x in integrate_df['Params']]
    data_df = integrate_df[filter_flag]
    
    sns.barplot(data=data_df,
                x='dataset',
                y='correlation',
                order=['simulated dataset 1',
                       'simulated dataset 2',
                       'simulated dataset 3',
                       'simulated dataset 4',
                      ],
                hue=key,
                palette={1: 'gainsboro', 2: 'red', 3: 'lightgray', 4: 'darkgray', 5: 'gray', 6: 'dimgray'},
                ax=axes[0])
    axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation='vertical')
    axes[0].set_ylabel('Spearman correlation')
    axes[0].legend(loc="upper left", bbox_to_anchor=(1,1))
    
#     sns.barplot(data=data_df,
#                 x='dataset',
#                 y='correlation',
#                 order=[
#                        'mouse2_slice99',
#                        'mouse2_slice169',
#                        'mouse1_slice301',
#                       ],
#                 hue=key,
#                 palette={1: 'gainsboro', 2: 'red', 3: 'lightgray', 4: 'darkgray', 5: 'gray', 6: 'dimgray'},
#                 ax=axes[1])
#     axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation='vertical')
#     axes[1].set_ylabel('Spearman correlation')
#     axes[1].legend(loc="upper left", bbox_to_anchor=(1,1))
    
    fig.tight_layout()
    fig.savefig(f'corr_{key}.pdf', transparent=True)