In [None]:
import json
import pandas as pd
import os
import numpy as np
import evaluation

import umap 
import matplotlib.pyplot as plt
import seaborn as sb


## Create Barplot

In [None]:
result_dir = ''
first_result = 'knn_full_results.csv'
second_result = 'sgd_full_results.csv'

dest_dir = f'results' # directory to save results

hue = 'classifier'
metric = ['accuracy', 'f1_score_macro']

In [None]:
df_result1 = pd.read_csv(f'{result_dir}/{first_result}')
df_result2 = pd.read_csv(f'{result_dir}/{second_result}')

In [None]:
df_results = pd.concat([df_result1, df_result2], ignore_index=True)
df_results['Task_ID'] = df_results['dataset'] + '_' + df_results['task']

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))

e = sb.barplot(ax=axs[0],data=df_results, x="Task_ID", y=metric[0], hue=hue, palette='BuPu')
e.set(title=f'{metric[0]} of {hue}s')

f = sb.barplot(ax=axs[1],data=df_results, x="Task_ID", y=metric[1], hue=hue, palette='BrBG')
f.set(title=f'{metric[1]} of {hue}s')

plt.savefig(f'{dest_dir}/{hue}_barplot.png')

## Create UMAP

In [None]:
root_dir = '/scr/zchen/datasets/morphem_70k_2.0'
dataset = 'Allen'

other_label = 'Structure' # Column name used for plotting the second umap

feature_dir = "../datasets/morphem_70k_2.0/features"
feature_file = "pretrained_resnet18_features.npy" 

dest_dir = f'results' # directory to save results

In [None]:
features_path = f'{feature_dir}/{dataset}/{feature_file}'
df_path = f'{root_dir}/{dataset}/enriched_meta.csv'

features = np.load(features_path)
df = pd.read_csv(df_path)

label_list = ['Label', other_label]

In [None]:
tasks = list(df['train_test_split'].unique())
tasks.remove('Train')

# Split into training and testing
train_idx = np.where(df['train_test_split'] == 'Train')[0]
all_test_indices = [np.where(df[task])[0] for task in tasks]
train_feat = features[train_idx]
test_feat = [features[idx] for idx in all_test_indices]

if not os.path.exists(dest_dir+ '/'):
    os.makedirs(dest_dir+ '/')

# Fit umap on training and project testing data
reducer = umap.UMAP(n_neighbors=15, n_components=2)
train_embeddings = reducer.fit_transform(train_feat)
train_aux = pd.concat((pd.DataFrame(train_embeddings, columns=["X", "Y"]), 
                       df.loc[train_idx].reset_index()), axis=1)
print('Fitted umap with train set.')

test_aux_list = []

for i in range(len(tasks)):
    test_embeddings = reducer.transform(test_feat[i])
    test_aux = pd.concat((pd.DataFrame(test_embeddings, columns=["X", "Y"]), 
                          df.loc[all_test_indices[i]].reset_index()), axis=1)
    test_aux_list.append(test_aux)
print('Transformed test set with fitted umap.')

In [None]:
# Plot the UMAP embedding

fig, axs = plt.subplots(nrows=2, ncols=len(tasks)+1, figsize=(20*len(tasks)+1, 20))

col1 = sb.hls_palette(len(df[label_list[0]].unique())).as_hex()
col2 = sb.hls_palette(len(df[label_list[1]].unique())).as_hex()
pal1, pal2 = {}, {}
for i in range(len(df[label_list[0]].unique())):
    val = df[label_list[0]].unique()[i]
    pal1[val] = col1[i]
    
for i in range(len(df[label_list[1]].unique())):
    val = df[label_list[1]].unique()[i]
    pal2[val] = col2[i]


# Train set classification label umap
a = sb.scatterplot(ax=axs[0,0],data=train_aux, x="X", y="Y", s=5, hue='Label', palette=pal1)
a.set(title=f'UMAP of {dataset} Train Set')

# Train set subgroup umap
c = sb.scatterplot(ax=axs[1,0],data=train_aux, x="X", y="Y", s=5, hue=label_list[1], palette=pal2)
c.set(title=f'UMAP of {dataset} Train Set')

legend_list = []
for i in range(len(tasks)):
    # Test set classification label umap
    b = sb.scatterplot(ax=axs[0,i+1], data=test_aux_list[i], x="X", y="Y", s=5, hue='Label', 
                       palette=pal1)
    b.set(title=f'UMAP of {dataset} Test Set for {tasks[i]}')

    # Test set subgroup umap
    d = sb.scatterplot(ax=axs[1,i+1], data=test_aux_list[i], x="X", y="Y", s=5, hue=label_list[1], 
                       palette=pal2)

    d.set(title=f'UMAP of {dataset} Test Set for {tasks[i]}')

    
fig.savefig(f'{dest_dir}/umap_{dataset}.png')
