# Approach B Training: BiLSTM with FNN

## Imports

In [6]:
!pip install torchtext==0.2.1
!pip install jsonlines

Collecting torchtext==0.2.1
  Downloading torchtext-0.2.1-py3-none-any.whl (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchtext
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.17.1
    Uninstalling torchtext-0.17.1:
      Successfully uninstalled torchtext-0.17.1
Successfully installed torchtext-0.2.1
Collecting jsonlines
  Downloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)
Installing collected packages: jsonlines
Successfully installed jsonlines-4.0.0


In [14]:
import jsonlines
import csv
import os
import json
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd drive/MyDrive/Uni/NLU/HBMP

## Constants

In [2]:
EPOCHS = 8
WORD_DIMENSION = 300
DROPOUT = 0.1
LEARN_RATE = 0.0005
LEARN_RATE_PATIENCE = 1
LEARN_RATE_DECAY = 0.9
LEARN_RATE_REDUCTION_FACTOR = 0.2

RESULTS_PATH = 'results'
TRAIN_PATH = '.data/snli/snli_1.0/train.csv'
TRAIN_JSONL_PATH = '.data/snli/snli_1.0/train.jsonl'
VALIDATION_PATH = '.data/snli/snli_1.0/dev.csv'
VALIDATION_JSONL_PATH = '.data/snli/snli_1.0/dev.jsonl'
ITERATION_1_PATH = '/content/drive/MyDrive/Colab/NLU_CW/bilstm_tuning/1'
ITERATION_2_PATH = '/content/drive/MyDrive/Colab/NLU_CW/bilstm_tuning/2'
ITERATION_3_PATH = '/content/drive/MyDrive/Colab/NLU_CW/bilstm_tuning/3'

# Gridspace values
SEEDS = [1234, 4321, 4444]
EMBEDDING_DIMENSIONS = [300, 450, 600, 750]
BATCH_SIZES = [16, 32, 64, 128]

## Preprocess Datasets

In [None]:
with open(TRAIN_PATH, mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    with jsonlines.open(TRAIN_JSONL_PATH, mode='w') as writer:
        for index, row in enumerate(csv_reader):
            writer.write(row)
with open(VALIDATION_PATH, mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    with jsonlines.open(VALIDATION_JSONL_PATH, mode='w') as writer:
        for index, row in enumerate(csv_reader):
            writer.write(row)

## Training

In [None]:
# You will need the repository to run the training
# https://drive.google.com/drive/folders/1NTlKNYkWa_vshVUWyf5SloPw8z3yUqsE?usp=share_link

In [None]:
# Perform gridspace search with 16 different hyperparameter setups,
# repeat 3 times and average

for SEED in SEEDS:
  for DIMENSION in EMBEDDING_DIMENSIONS:
    for BATCH_SIZE in BATCH_SIZES:
      !python train.py \
        --epochs $EPOCHS \
        --batch_size $BATCH_SIZE \
        --corpus nlu \
        --encoder_type HBMP \
        --activation leakyrelu \
        --optimizer adam \
        --word_embedding glove.840B.300d \
        --embed_dim $WORD_DIMENSION \
        --fc_dim $DIMENSION \
        --hidden_dim $DIMENSION \
        --layers 1 \
        --dropout $DROPOUT \
        --learning_rate $LEARN_RATE \
        --lr_patience $LEARN_RATE_PATIENCE \
        --lr_decay $LEARN_RATE_DECAY \
        --lr_reduction_factor $LEARN_RATE_REDUCTION_FACTOR \
        --weight_decay 0 \
        --early_stopping_patience 8 \
        --save_path $RESULTS_PATH \
        --seed $SEED


## Model Evaluation Using Validation Data

In [21]:
def read_json_files(directory):
    json_files = [file for file in os.listdir(directory) if file.endswith('.json')]
    data = []
    for file in json_files:
        with open(os.path.join(directory, file), 'r') as f:
            json_data = json.load(f)
            model_setup = f'bs:{json_data["batch_size"]};ed:{json_data["embedding_dimensionality"]}'
            model = [[model_setup,
                      details['epoch'],
                      details['train_loss'],
                      details['val_loss'],
                      details['val_accuracy']] for details in json_data['model_details']]
            data.extend(model)
    return data

def convert_to_float_if_valid(s):
  try:
      return float(s)
  except ValueError:
      return s

In [22]:
iteration_1 = read_json_files(ITERATION_1_PATH)
iteration_2 = read_json_files(ITERATION_2_PATH)
iteration_3 = read_json_files(ITERATION_3_PATH)

In [27]:
df1 = pd.DataFrame(iteration_1, columns=['model', 'epoch', 'train_loss', 'val_loss', 'val_accuracy'])
df1 = df1.applymap(convert_to_float_if_valid)
df1

Unnamed: 0,model,epoch,train_loss,val_loss,val_accuracy
0,bs:32;ed:300,1.0,0.592241,0.534895,72.673294
1,bs:32;ed:300,2.0,0.454411,0.534901,73.192818
2,bs:32;ed:300,3.0,0.274816,0.654219,72.480331
3,bs:32;ed:300,4.0,0.084075,0.992834,72.539703
4,bs:32;ed:300,5.0,0.036645,1.292853,71.797531
...,...,...,...,...,...
123,bs:16;ed:300,4.0,0.077409,1.061908,71.842064
124,bs:16;ed:300,5.0,0.035601,1.449939,71.381920
125,bs:16;ed:300,6.0,0.015465,1.643802,71.278015
126,bs:16;ed:300,7.0,0.010655,1.822342,71.396759


In [28]:
df2 = pd.DataFrame(iteration_2, columns=['model', 'epoch', 'train_loss', 'val_loss', 'val_accuracy'])
df2 = df2.applymap(convert_to_float_if_valid)
df2

Unnamed: 0,model,epoch,train_loss,val_loss,val_accuracy
0,bs:32;ed: 300,1.0,0.595648,0.531821,73.014694
1,bs:32;ed: 300,2.0,0.456764,0.534139,73.727173
2,bs:32;ed: 300,3.0,0.278970,0.603006,72.940475
3,bs:32;ed: 300,4.0,0.082501,1.064193,71.842064
4,bs:32;ed: 300,5.0,0.034768,1.374172,71.574883
...,...,...,...,...,...
123,bs:128;ed: 750,4.0,0.235185,0.706150,71.322548
124,bs:128;ed: 750,5.0,0.074172,1.075380,71.233482
125,bs:128;ed: 750,6.0,0.032497,1.286613,70.728806
126,bs:128;ed: 750,7.0,0.016203,1.355956,70.862396


In [29]:
df3 = pd.DataFrame(iteration_3, columns=['model', 'epoch', 'train_loss', 'val_loss', 'val_accuracy'])
df3 = df3.applymap(convert_to_float_if_valid)
df3

Unnamed: 0,model,epoch,train_loss,val_loss,val_accuracy
0,bs:16;ed:300,1.0,0.584693,0.521263,73.994354
1,bs:16;ed:300,2.0,0.441641,0.514260,73.920143
2,bs:16;ed:300,3.0,0.257225,0.748974,71.322548
3,bs:16;ed:300,4.0,0.122678,0.896173,72.331894
4,bs:16;ed:300,5.0,0.037782,1.410608,70.862396
...,...,...,...,...,...
123,bs:128;ed:750,4.0,0.226705,0.720766,70.773338
124,bs:128;ed:750,5.0,0.078682,1.005751,71.055367
125,bs:128;ed:750,6.0,0.033852,1.241796,70.565529
126,bs:128;ed:750,7.0,0.016896,1.315327,70.847557


In [30]:
model_column = df1['model']
numeric_columns = df1.select_dtypes(include=['number']).columns

# Sum the corresponding numerical elements of the DataFrames
sum_df = df1[numeric_columns] + df2[numeric_columns] + df3[numeric_columns]

# Divide the sum by the number of DataFrames
average_df = sum_df / 3
average_df = pd.concat([model_column, average_df], axis=1)
average_df

Unnamed: 0,model,epoch,train_loss,val_loss,val_accuracy
0,bs:32;ed:300,1.0,0.590861,0.529326,73.227448
1,bs:32;ed:300,2.0,0.450939,0.527767,73.613378
2,bs:32;ed:300,3.0,0.270337,0.668733,72.247785
3,bs:32;ed:300,4.0,0.096418,0.984400,72.237887
4,bs:32;ed:300,5.0,0.036398,1.359211,71.411603
...,...,...,...,...,...
123,bs:16;ed:300,4.0,0.179766,0.829608,71.312650
124,bs:16;ed:300,5.0,0.062818,1.177023,71.223590
125,bs:16;ed:300,6.0,0.027271,1.390737,70.857450
126,bs:16;ed:300,7.0,0.014585,1.497875,71.035571


In [32]:
# bs:128;ed:600
min_index = average_df['val_loss'].idxmin()
min_row = average_df.loc[min_index]
print(min_row)

model           bs:128;ed:600
epoch                     2.0
train_loss           0.486125
val_loss             0.518336
val_accuracy        74.078468
Name: 81, dtype: object
