# Setup

## Imports

In [1]:
# Import importlib to reload modules and sys and os to add the path for other imports
import importlib
import sys
import os
import torch

# Append the parent directory to the path to import the necessary modules
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import utilities
from utils import setuputil, trainutil, inferutil
from classes.models import SimpleGeluEmbed

# Reload the necessary modules to ensure they are up-to-date
importlib.reload(setuputil)
importlib.reload(trainutil)
importlib.reload(inferutil)
importlib.reload(SimpleGeluEmbed)

# Import the required utils
from utils.setuputil import setup_config, display_config
from utils.trainutil import train_model
from utils.inferutil import infer_one, infer_full

# Import the SimpleGeluEmbedAdd class
from classes.models.SimpleGeluEmbed import SimpleGeluEmbedAvg

## Config Setup

In [2]:
# Define the input configuration for the simple model
input_config = {
    # Environment and Model Info
    "env": "bvm",
    "approach": "simple",
    "model_name": "SimpleGeluEmbedAvg",
    
    # System Configuration
    "device": "cuda:0",
    "threads": 32,
    "seed": 42,
    
    # Data Configuration
    "data_dir": "../../../data/farzan",
    "data_ds": "manual",
    
    # Model Parameters
    "rows": 100,
    "cols": 100,
    "tokens": 16,
    
    # Vocabulary Parameters
    "vocab_size": 150000,
    "vocab_space": True,
    "vocab_case": "both",
    
    # Training Parameters
    "batch": 40,
    "lr": 1e-1,
    "mu": 0.25,
    "epochs": 20,
    "patience": 2,
    "save_int": 10,
    "save_dir": "../models/",
}

# Setup the configuration using setuputil and display it
config = setup_config(input_config)
display_config(config)

# Define local variables from the config dictionary
# System variables
DEVICE = config["DEVICE"]
THREADS = config["THREADS"]

# Data loaders and vocab
train_loader = config["train_loader"]
val_loader = config["val_loader"]
test_loader = config["test_loader"]
spreadsheet_vocab = config["vocab"]
spreadsheet_wvs = config["wvs"]

# Training parameters
batch_size = config["batch"]
lr = config["lr"]
mu = config["mu"]
epochs = config["epochs"]
patience = config["patience"]
save_int = config["save_int"]
save_dir = config["save_dir"]
save_name = config["save_name"]

Getting Vocab: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 120.06it/s]


40(P) = 40(G) + 0(E)
Unique Tokens: 5593
Vocab Size: 5597


Creating Word Embeddings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 5597/5597 [00:00<00:00, 79459.52it/s]


Word Embeddings Shape: torch.Size([5597, 50])


Processing files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 7223.46it/s]



40(P) = 40(G) + 0(E)


Processing files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 5232.42it/s]



5(P) = 5(G) + 0(E)


Processing files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 4336.54it/s]



5(P) = 5(G) + 0(E)

Configuration for SIMPLE approach:
{
  "env": "bvm",
  "approach": "simple",
  "model_base": "glove50",
  "model_name": "SimpleGeluEmbedAvg",
  "rows": 100,
  "cols": 100,
  "tokens": 16,
  "DEVICE": "cuda:0",
  "THREADS": 32,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../../data/farzan",
  "train_dir": "../../../data/farzan/manual_train",
  "val_dir": "../../../data/farzan/manual_val",
  "test_dir": "../../../data/farzan/manual_test",
  "vocab_size": 5597,
  "vocab_space": true,
  "vocab_case": "both",
  "vocab": "<Vocab Object>",
  "wvs": "<Embedding Matrix>",
  "train_loader": "<LoaderSimple Object>",
  "val_loader": "<LoaderSimple Object>",
  "test_loader": "<LoaderSimple Object>",
  "batch": 40,
  "lr": 0.1,
  "mu": 0.25,
  "epochs": 20,
  "patience": 2,
  "save_int": 10,
  "save_dir": "../models/",
  "save_name": "bsim42_SimpleGeluEmbedAvg_manual_100x100x16_bSp5k_bsz40lr1e-1ep20pa2"
}


# Model Training

## Define the model

In [3]:
# Define the untrained model and move it to the device
untrained_model = SimpleGeluEmbedAvg(spreadsheet_wvs).to(DEVICE)
print(untrained_model)

SimpleGeluEmbedAvg(
  (_embed): Embedding(5597, 50)
  (_drop): Dropout(p=0.05, inplace=False)
  (_non_linear): GELU()
  (_pred): Linear(in_features=50, out_features=1, bias=True)
)


## Train the Model

In [None]:
trained_model = train_model(
    model=untrained_model,
    train_data=train_loader, 
    val_data=val_loader, 
    DEVICE=DEVICE, 
    batch_size=batch_size,
    lr=lr,
    mu=mu,
    max_epochs=epochs,
    patience=patience,
    save_int=save_int,
    save_dir=save_dir,
    save_name=save_name,
    config=config
)

Epoch 0


Batch Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.75s/it]
Validation Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.33s/it]


Train Loss: 1.7129079103469849, Perplexity: 1.0000171292258069
Val Loss: 0.7026445269584656, Perplexity: 1.0000070264699552


Epoch 1


Batch Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.73s/it]
Validation Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.29s/it]


Train Loss: 0.712165892124176, Perplexity: 1.0000071216842803
Val Loss: 0.4020729660987854, Perplexity: 1.000004020737744


Epoch 2


Batch Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.62s/it]
Validation Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.32s/it]


Train Loss: 0.40179795026779175, Perplexity: 1.0000040179875749
Val Loss: 0.30077892541885376, Perplexity: 1.0000030077937776


Epoch 3


Batch Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.61s/it]
Validation Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.32s/it]


Train Loss: 0.30232474207878113, Perplexity: 1.0000030232519908
Val Loss: 0.2615412175655365, Perplexity: 1.0000026154155959


Epoch 4


Batch Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.65s/it]
Validation Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.29s/it]


Train Loss: 0.2652941048145294, Perplexity: 1.0000026529445671
Val Loss: 0.237630695104599, Perplexity: 1.0000023763097745


Epoch 5


Batch Processing:   0%|                                                                                                                                  | 0/1 [00:00<?, ?it/s]

# Evaluation

In [None]:
# Define params forevaluation
thresh = 0.91
loc = 0
cond = '>'
disp_max=True

## Single Example

In [None]:
# Check with single example
infer_one(trained_model, train_loader, loc=loc, threshold=thresh, condition=cond, disp_max=disp_max, device=DEVICE)
infer_one(trained_model, val_loader, loc=loc, threshold=thresh, condition=cond, disp_max=disp_max, device=DEVICE)
infer_one(trained_model, test_loader, loc=loc, threshold=thresh, condition=cond, disp_max=disp_max, device=DEVICE)

## All Examples

In [None]:
# Evaluate the model on all train files
infer_full(trained_model, train_loader, batch_size=batch_size, threshold=thresh, device=DEVICE)

In [None]:
# All val files
infer_full(trained_model, val_loader, batch_size=batch_size, threshold=thresh, device=DEVICE)

In [None]:
# All test files
infer_full(trained_model, test_loader, batch_size=batch_size, threshold=thresh, device=DEVICE)