## Experiment 2
<br>
-- ViT Base model hyperparameters explore


In [1]:
import json
import re
import random
import os
from collections import defaultdict
from itertools import product

import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import utils

2023-02-04 13:02:12.240122: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-04 13:02:12.814127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2023-02-04 13:02:12.814174: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:


In [2]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

seed_everything()

### Data Load

In [3]:
data_path = '/home/donghyun/eye_writing_classification/v2_dataset/200_points_dataset/'

with open(data_path + 'eog_katakana_200.json') as f:
  eog_katakana = json.load(f)

with open(data_path + 'eog_raw_numbers_200.json') as f:
  eog_raw_numbers = json.load(f)

with open(data_path + 'reference_data_200.json') as f:
  reference_data = json.load(f)

### Experiment

In [4]:
# hyperparameters

vit_hidden_size = [128,256,512]
vit_patch_size = [5,10]
vit_heads = [4,8]
vit_n_layers = [8,12]
vit_mlp_units = [[128,64],
                 [64,32]]
vit_dropout = [0]
vit_mlp_dropout = [0]

In [5]:
# Config class

class Config:
    split_ratio = 0.3
    ref_key = 'numbers'
    batch_size = 10            # fix : must be equaled with number of test pairs
    n_batch = 180
    lr = 0.0005
    model_type = 'ViTBaseModel'
    ViT_params = {}
    epochs = 100

In [None]:
# grid search for hyperparameters

cols = ['hidden_size', 'batch_size', 'patch_size', 'heads', 'n_layers', 'mlp_units', 'dropout', 'mlp_dropout', 'score']
best_perform_df = pd.DataFrame(columns=cols)

raw_numbers_dict = defaultdict(list)

i = 0
for hs, ps, heads, n_layers, mlp_units, dropout, mlp_dropout in product(vit_hidden_size,
                                                                            vit_patch_size,
                                                                            vit_heads,
                                                                            vit_n_layers,
                                                                            vit_mlp_units,
                                                                            vit_dropout,
                                                                            vit_mlp_dropout
                                                                            ):
    i+=1
    print('index : ', i)

    cfg = Config
    cfg.ViT_params['hidden_size'] = hs
    cfg.ViT_params['batch_size'] = cfg.batch_size
    cfg.ViT_params['patch_size'] = ps
    cfg.ViT_params['heads'] = heads
    cfg.ViT_params['n_layers'] = n_layers
    cfg.ViT_params['mlp_units'] = mlp_units
    cfg.ViT_params['dropout'] = dropout
    cfg.ViT_params['mlp_dropout'] = mlp_dropout

    _, _, _, test_acc_list = utils.experiment(cfg, eog_raw_numbers, reference_data)
    score = np.mean(test_acc_list[-3:])

    best_perform_df.loc[i] = [hs, cfg.batch_size, ps, heads, n_layers, str(mlp_units), dropout, mlp_dropout, score]

best_perform_df = best_perform_df.sort_values(by='score',ascending=False)

In [6]:
# hyperparameters save

save_path = '/home/donghyun/eye_writing_classification/experiments/save/'
best_perform_df.to_csv(save_path+'experiment2_vit_hyperparams.csv', index=True)

In [7]:
# load the hyperparameters

save_path = '/home/donghyun/eye_writing_classification/experiments/save/'
best_perform_df = pd.read_csv(save_path+'experiment2_vit_hyperparams.csv')

best_perform_df.head(10)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,hidden_size,batch_size,patch_size,heads,n_layers,mlp_units,dropout,mlp_dropout,score
0,0,14,128,10,10,8,8,"[64, 32]",0,0,90.416667
1,1,8,128,10,5,8,12,"[64, 32]",0,0,90.0
2,2,15,128,10,10,8,12,"[128, 64]",0,0,89.583333
3,3,1,128,10,5,4,8,"[128, 64]",0,0,88.958333
4,4,6,128,10,5,8,8,"[64, 32]",0,0,87.5
5,5,13,128,10,10,8,8,"[128, 64]",0,0,87.083333
6,6,16,128,10,10,8,12,"[64, 32]",0,0,87.083333
7,7,2,128,10,5,4,8,"[64, 32]",0,0,86.875
8,8,41,512,10,10,4,8,"[128, 64]",0,0,86.875
9,9,9,128,10,10,4,8,"[128, 64]",0,0,86.875


In [None]:
class Num_config:
    split_ratio = 0.3
    ref_key = 'numbers'
    batch_size = 10            # fix : Not must be equaled with number of test pairs 
    n_batch = 50
    lr = 0.0005
    model_type = 'ViTBaseModel'
    ViT_params = {}
    epochs = 100

class Kata_config:
    split_ratio = 0.3
    ref_key = 'katakana'
    batch_size = 12            # fix : Not must be equaled with number of test pairs 
    n_batch = 50
    lr = 0.001
    model_type = 'ViTBaseModel'
    ViT_params = {}
    epochs = 100

best_params = best_perform_df.iloc[0].to_dict()
best_params['mlp_units'] = re.sub('[\[\]]','',best_params['mlp_units'])
best_params['mlp_units'] = list(map(int,best_params['mlp_units'].split(',')))       # str to list

num_cfg = Num_config
num_cfg.ViT_params = best_params.copy()
num_cfg.ViT_params['batch_size'] = num_cfg.batch_size

kata_cfg = Kata_config
kata_cfg.ViT_params = best_params.copy()
kata_cfg.ViT_params['batch_size'] = kata_cfg.batch_size

times = 10
numbers_dict = defaultdict(list)
katakana_dict = defaultdict(list)
num_cm = np.zeros((num_cfg.batch_size,num_cfg.batch_size))
kata_cm = np.zeros((kata_cfg.batch_size,kata_cfg.batch_size))
for t in range(times):
    _, num_train_acc, num_train_loss, num_test_acc, num_confusion_matrix = utils.experiment(num_cfg, eog_raw_numbers, reference_data)
    _, kat_train_acc, kat_train_loss, kat_test_acc, kata_confusion_matrix = utils.experiment(kata_cfg, eog_katakana, reference_data)
    numbers_dict[t] = [num_train_acc, num_train_loss, num_test_acc]
    katakana_dict[t] = [kat_train_acc, kat_train_loss, kat_test_acc]
    num_cm += num_confusion_matrix
    kata_cm += kata_confusion_matrix


In [None]:
num_total = np.sum(num_confusion_matrix, axis=1)
kata_total = np.sum(kata_confusion_matrix, axis=1)
norm_num_cm = num_confusion_matrix / num_total
norm_kata_cm = kata_confusion_matrix / kata_total

In [None]:
num_ticks = ['num 0', 'num 1', 'num 2', 'num 3', 'num 4',
             'num 5', 'num 6', 'num 7', 'num 8', 'num 9']

df_cm = pd.DataFrame(norm_num_cm, index=num_ticks, columns=num_ticks)

plt.figure(figsize=(10,7))
plt.title('Arabic numbers confusion matrix')
cm = sns.heatmap(df_cm, annot=True)
cm.set_yticklabels(cm.get_yticklabels(), rotation=45)
cm.set_xticklabels(cm.get_xticklabels(), rotation=45)

In [None]:
kata_ticks = ['kata 1', 'kata 2', 'kata 3', 'kata 4', 'kata 5', 'kata 6',
              'kata 7', 'kata 8', 'kata 9', 'kata 10', 'kata 11', 'kata 12']

df_cm = pd.DataFrame(norm_num_cm, index=kata_ticks, columns=kata_ticks)

plt.figure(figsize=(10,7))
plt.title('katakana gesture confusion matrix')
cm = sns.heatmap(df_cm, annot=True)
cm.set_yticklabels(cm.get_yticklabels(), rotation=45)
cm.set_xticklabels(cm.get_xticklabels(), rotation=45)

### Save

In [8]:
save_path = '/home/donghyun/eye_writing_classification/experiments/save/'

with open(save_path + 'ex2_katakana_results.json', 'w') as f:
    json.dump(dict(katakana_dict),f)

with open(save_path + 'ex2_numbers_results.json', 'w') as f:
    json.dump(dict(numbers_dict),f)

### Visualization

In [None]:
save_path = '/home/donghyun/eye_writing_classification/experiments/save/'

with open(save_path+'ex1_raw_numbers_results.json') as f:
    hybrid_raw_numbers_results = json.load(f)

with open(save_path+'ex2_numbers_results.json') as f:
    vit_numbers_results = json.load(f)

with open(save_path+'ex2_katakana_results.json') as f:
    vit_katakana_results = json.load(f)

In [17]:
hybrid_test_acc = []
vit_test_acc = []
for t in range(10):
    key = str(t)
    hybrid_test_acc.append(hybrid_raw_numbers_results[key][2])
    vit_test_acc.append(vit_numbers_results[key][2])

hybrid_avg_results = np.array(hybrid_test_acc).mean(axis=0)
vit_avg_results  =np.array(vit_test_acc).mean(axis=0)

In [None]:
def analysis(data_list):
    return np.mean(data_list), max(data_list), min(data_list), np.std(data_list)

hybrid_numbers_test_performance = [t[-1] for t in hybrid_test_acc]
vit_numbers_test_performance = [t[-1] for t in vit_test_acc]

print('Accuracy base on raw numbers with 10 repetitions')
print(' '*29 +'1,     2,    3,      4,      5,      6,     7,     8,     9,     10,       Avg.   Best.   Worst.  Std.')
print('hybrid model performance : {}, {}'.format(hybrid_numbers_test_performance, analysis(hybrid_numbers_test_performance)))
print('ViT model performance    : {}, {}'.format(vit_numbers_test_performance, analysis(vit_numbers_test_performance)))

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1,2, figsize = (20,8))

# test accuracy
axes[0].plot(hybrid_avg_results, c = 'b', linestyle = 'solid', linewidth = 3)
axes[0].plot(vit_avg_results, c = 'r', linestyle = 'solid', linewidth = 3)

axes[0].set_ylim(20,100)

axes[0].set_title("Evaluation", fontsize=20)
axes[0].set_xlabel('Epoch', fontsize = 20)
axes[0].set_ylabel('Accuracy', fontsize = 20)

axes[0].legend(['Hybrid base model', 'ViT base model'], fontsize = 15)

# plot
plt.show()