## Experiment 2
<br>
-- ViT Base model hyperparameters explore


In [1]:
import json
import utils

from collections import defaultdict
from itertools import product
from IPython.display import clear_output

import pandas as pd
import numpy as np

### Data Load

In [None]:
data_path = '/home/donghyun/eye_writing_classification/v2_dataset/200_points_dataset/'

with open(data_path + 'eog_raw_numbers_200.json') as f:
  eog_raw_numbers = json.load(f)

with open(data_path + 'reference_data_200.json') as f:
  reference_data = json.load(f)

### Experiment

In [None]:
# hyperparameters

vit_hidden_size = [128,256,512]
vit_patch_size = [5,10,20]
vit_heads = [4,8,12]
vit_n_layers = [4,8,12]
vit_mlp_units = [[256,128,64],
                 [128,64,32],
                 [128,64],
                 [64,32]
                ]
vit_dropout = [0, 0.2, 0.3]
vit_mlp_dropout = [0, 0.2, 0.3]

In [None]:
# Config class

class Config:
    split_ratio = 0.3
    ref_key = 'numbers'
    batch_size = 10            # fix : must be equaled with number of test pairs 
    n_batch = 50
    model_type = 'ViTBaseModel'
    ViT_params = {}
    epochs = 1000

In [None]:
# grid search

cols = ['hidden_size', 'batch_size', 'patch_size', 'heads', 'n_layers', 'mlp_units', 'dropout', 'mlp_dropout', 'score']
best_perform_df = pd.DataFrame(columns=cols)

raw_numbers_dict = defaultdict(list)

i = 0
for hs, bs, ps, heads, n_layers, mlp_units, dropout, mlp_dropout in product(vit_hidden_size,
                                                                            vit_patch_size,
                                                                            vit_heads,
                                                                            vit_n_layers,
                                                                            vit_mlp_units,
                                                                            vit_dropout,
                                                                            vit_mlp_dropout
                                                                            ):
    i+=1
    
    cfg = Config
    cfg.ViT_params['hidden_size'] = hs
    cfg.ViT_params['batch_size'] = cfg.batch_size
    cfg.ViT_params['patch_size'] = ps
    cfg.ViT_params['heads'] = heads
    cfg.ViT_params['n_layers'] = n_layers
    cfg.ViT_params['mlp_units'] = mlp_units
    cfg.ViT_params['dropout'] = dropout
    cfg.ViT_params['mlp_dropout'] = mlp_dropout

    _, _, _, test_acc_list = utils.experiment(cfg, eog_raw_numbers, reference_data)
    score = np.mean(test_acc_list[:-5])

    best_perform_df.loc[i] = [hs, bs, ps, heads, n_layers, str(mlp_units), dropout, mlp_dropout, score]

best_perform_df = best_perform_df.sort_values(by='score')

clear_output()

In [None]:
best_perform_df

In [None]:

class Config:
    split_ratio = 0.3
    ref_key = 'numbers'
    batch_size = 12            # fix : must be equaled with number of test pairs 
    n_batch = 50
    model_type = 'ViTBaseModel'
    ViT_params = {}
    epochs = 1000

best_params = best_perform_df.iloc[0]

cfg = Config
cfg.ViT_params = best_params
cfg.ViT_params['mlp_units'] = list(map(int, best_params['mlp_units']))

times = 10

raw_numbers_dict = defaultdict(list)
for t in range(times):
    _, raw_train_acc, raw_train_loss, raw_test_acc = utils.experiment(cfg, eog_raw_numbers, reference_data)
    raw_numbers_dict[t] = [raw_train_acc, raw_train_loss, raw_test_acc]

clear_output()

### Save

In [None]:
save_path = '/home/donghyun/eye_writing_classification/experiments/save/'

with open(save_path + 'experiment2_raw_numbers_results.json', 'w') as f:
    json.dump(dict(raw_numbers_dict),f)

best_perform_df.to_csv(save_path+'experiment2_vit_hyperparams.csv', index=True)

### Visualization

In [None]:
save_path = '/home/donghyun/eye_writing_classification/experiments/save/'

ex1_numbers_results = json.dumps(save_path + 'experiment1_raw_numbers_results.json')
ex2_numbers_results = json.dumps(save_path + 'experiment2_raw_numbers_results.json')