# Context
This notebook drives the training process for different models.

In [1]:
# Set project's environment variables
import os
import sys
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../../project.env")
sys.path.append(os.environ["PYTHONPATH"])

In [2]:
# Import project-wide and PH2 specific variables and functions
import superheader as sup
from TRAIN.architecture.archeader import bert



Chosen class grouping: alpha-classes


Directory /Users/diego/Desktop/iteso/TOG/ exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/src exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/media exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/scores exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH1/alpha-classes exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH2/alpha-classes exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH3/alpha-classes exists. Continuing with execution
device: mps
Directory /Users/diego/Desktop/iteso/TOG/bin/load/TRAIN/distilbert-base-uncased exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin/load/TRAIN/prajjw

# Models

## Setup

In [3]:
import torch.nn as nn
import torch.optim as optim

import gc

In [4]:
import json
with open(os.path.join(sup.DATA_ROOT, "alpha-classes-subsets.json"), "r") as f:
    loaded = json.load(f)

# Convert keys back to int and lists back to tuples
subsets = {
    int(k): {d: v for d, v in v_dict.items()}
    for k, v_dict in loaded.items()
}
subsets


{2: {'easy': [8, 12], 'average': [2, 19], 'hard': [19, 23]},
 3: {'easy': [21, 24, 27], 'average': [4, 13, 19], 'hard': [2, 18, 22]},
 4: {'easy': [0, 5, 6, 13], 'average': [6, 16, 18, 20], 'hard': [1, 3, 7, 16]},
 5: {'easy': [2, 5, 9, 11, 12],
  'average': [2, 3, 9, 24, 26],
  'hard': [10, 11, 17, 19, 22]},
 6: {'easy': [0, 3, 14, 20, 22, 27],
  'average': [0, 10, 13, 14, 20, 21],
  'hard': [4, 14, 19, 22, 24, 27]},
 7: {'easy': [0, 5, 7, 13, 16, 23, 26],
  'average': [4, 5, 6, 11, 12, 14, 22],
  'hard': [1, 6, 9, 13, 14, 25, 27]},
 8: {'easy': [0, 1, 3, 6, 10, 13, 18, 25],
  'average': [0, 7, 10, 12, 13, 20, 23, 26],
  'hard': [0, 4, 10, 11, 13, 17, 18, 22]},
 9: {'easy': [1, 7, 10, 16, 20, 22, 24, 25, 27],
  'average': [5, 6, 7, 13, 14, 20, 23, 25, 27],
  'hard': [1, 2, 3, 5, 7, 10, 14, 15, 20]},
 10: {'easy': [0, 6, 8, 11, 14, 15, 17, 20, 22, 27],
  'average': [0, 4, 7, 8, 10, 11, 12, 14, 15, 23],
  'hard': [1, 4, 5, 6, 15, 16, 17, 20, 22, 27]},
 11: {'easy': [0, 2, 4, 5, 7, 8, 13

In [5]:
base_data_config = {
  "PH3" : False,
  "reducer" : '',
  "kernel" : '',
  "n" : -1,
  "label_col" : sup.class_numeric_column,
  "batch_size" : 256,
  "class_list" : 'specified'
}

base_train_config = {
  "device" : bert.device,
  "arch" : sup.TRAIN_BERT_CODE,
  "loadable" : bert.DISTILBERT,
  "optimizer" : optim.AdamW,
  "lr" : 1e-5,
  "weight_decay" : 0,
  "loss_fn" : nn.CrossEntropyLoss,
}

base_num_epochs = 50

num_class_candidates = list(range(2, 29+1, 3))

## Train

In [6]:
metric_tracker = dict(dict(dict()))

In [None]:
for data_unit in [sup.DATA_S_PF, sup.DATA_S_PV]:
  data_config = base_data_config.copy()
  train_config = base_train_config.copy()

  data_config["data_unit"] = data_unit
  if data_unit == sup.DATA_S_PF:
    data_config["seq_len"] = 1
  else:
    data_config["seq_len"] = 12

  for PH2 in [False, True]:
    data_config["PH2"] = PH2
    if PH2:
      data_config["input_dim"] = 87
    else:
      data_config["input_dim"] = 72

    for i in range(10):
      n = num_class_candidates[i]
      s = subsets[n]
      train_config["num_epochs"] = (i+1) * base_num_epochs
      for difficulty in ['average']:
        s_dif = s[difficulty]
        data_config["class_numeric_list"] = s_dif
        
        print(data_config)
        print(train_config)
        model = bert.BERT(data_config=data_config, df=None, 
                          train_config=train_config)
        
        model.fit(verbose=True)

        model.full_score()
        print(model.accuracy)
        print(model.macro_f1)
        print(model.macro_precision)
        print(model.macro_recall)
        model.plot_loss()

        metrics = {"accuracy" : model.accuracy,
                  "macro_f1" : model.macro_f1,
                  "macro_precision" : model.macro_precision,
                  "recall" : model.macro_recall,
                  "confusion" : model.confusion,
                  "loss" : model.loss_fig}
        
        metric_tracker[data_unit] = dict()
        metric_tracker[data_unit][PH2] = dict()
        metric_tracker[data_unit][PH2][n] = dict()
        metric_tracker[data_unit][PH2][n][difficulty] = metrics.copy()
        
        print("clearing memory...")
        del model
        gc.collect()



{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [2, 19]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 50}


                                                                                  

0.978515625
0.9785116083862722
0.9787328244274809
0.9784771496147098
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [2, 3, 9, 24, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 100}


                                                                                    

0.9681429681429682
0.9683246063514886
0.9683806802467088
0.9683265101289564
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [0, 7, 10, 12, 13, 20, 23, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 150}


                                                                                    

0.9596570852244075
0.960359553372818
0.9610092718014719
0.9601401611268268
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [2, 6, 8, 13, 14, 15, 16, 17, 19, 20, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 200}


                                                                                    

0.9318092234923138
0.9310368799653868
0.9309910274574577
0.9313364668644294
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [2, 3, 6, 9, 12, 13, 14, 16, 17, 18, 19, 21, 24, 25]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 250}


                                                                                    

0.9695603156708005
0.9698186577353928
0.9702869730206259
0.9698718149399606
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [0, 1, 2, 4, 7, 9, 10, 14, 17, 18, 19, 20, 22, 23, 25, 26, 27]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 300}


                                                                                    

0.9551579925650557
0.9547998570262392
0.9550665712600251
0.9549079492065544
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 350}


                                                                                    

0.9630171333054743
0.9626579677848921
0.9628184542534917
0.9627447570926493
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 400}


                                                                                    

0.9538239538239538
0.953631104880669
0.95417620115937
0.9536418240780485
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 450}


                                                                                      

0.9640463132236441
0.9640814282858273
0.9643632063717913
0.9640781368344196
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': False, 'input_dim': 72, 'class_numeric_list': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 500}


                                                                                      

0.9617245930488342
0.9618324033563099
0.9621932567844979
0.9617666176730224
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [2, 19]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 50}


                                                                                 

0.947265625
0.9472074239560062
0.9488970588235295
0.9471503776607919
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [2, 3, 9, 24, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 100}


                                                                                    

0.8477078477078477
0.8473664541405681
0.8482843781878149
0.8476960383355738
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [0, 7, 10, 12, 13, 20, 23, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 150}


                                                                                    

0.9268784669692385
0.9270026096333184
0.9265958701960271
0.9278186494787841
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [2, 6, 8, 13, 14, 15, 16, 17, 19, 20, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 200}


                                                                                    

0.8963342530547891
0.8954789318947137
0.9018571949871468
0.8952553358953976
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [2, 3, 6, 9, 12, 13, 14, 16, 17, 18, 19, 21, 24, 25]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 250}


                                                                                    

0.9388387824126269
0.9391634660561051
0.9395967741966317
0.9394557034705742
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [0, 1, 2, 4, 7, 9, 10, 14, 17, 18, 19, 20, 22, 23, 25, 26, 27]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 300}


                                                                                    

0.9214684014869888
0.9218107253825342
0.9220101304215623
0.9221564525817018
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 350}


                                                                                    

0.9325114918512327
0.9324341823366692
0.9332430483554299
0.9326365320293915
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 400}


                                                                                    

0.9264069264069265
0.9262884670882708
0.9269211651936016
0.9262752724740602
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 450}


                                                                                      

0.9325106642291285
0.9323128467753148
0.9329260703300819
0.932581970349005
clearing memory...
{'PH3': False, 'reducer': '', 'kernel': '', 'n': -1, 'label_col': 'class_numeric', 'batch_size': 256, 'class_list': 'specified', 'data_unit': 'Spf', 'seq_len': 1, 'PH2': True, 'input_dim': 87, 'class_numeric_list': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]}
{'device': device(type='mps'), 'arch': 'BERT', 'loadable': 'distilbert-base-uncased', 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 1e-05, 'weight_decay': 0, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'num_epochs': 500}


Training:  24%|██▎       | 118/500 [20:20<1:05:53, 10.35s/epoch, epoch=118, loss=34.2]