In [1]:
import importlib
import torch
import torch.nn as nn
from torch import optim

import sys

In [2]:
# local imports 
sys.path.append("..")

import transformer_model.model as model
importlib.reload(model)

import transformer_model.model_utils as model_utils
importlib.reload(model_utils)

import transformer_model.data_utils as data_utils
importlib.reload(data_utils)

from data import ArticlesIter, articles_filepath

2023-04-01 12:43:10.448352: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-01 12:43:12.039325: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-04-01 12:43:12.039592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
2023-04-01 12:43:13.932206: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcu

## Setup

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Load Vocab

In [4]:
SAVE_PATH = "../transformer_model/vocab_save/" + "embeddings_vocab.pt"

emb_vocab_obj = torch.load(SAVE_PATH)

print("Vocab size:", len(emb_vocab_obj))

Vocab size: 400004


### Load Model

In [5]:
SAVE_PATH = "../transformer_model/model_save/" + "glove_emb_model.pt"

emb_model = torch.load(SAVE_PATH)

print("Loaded hyperparameters:")
print("---------------------------")
print("Model Dimension:    ", emb_model.d_model)
print("Num Layers:         ", emb_model.num_layers)
print("Num Heads:          ", emb_model.nhead)
print("Max Length:         ", emb_model.max_seq_len)
print("Dropout Per.:       ", emb_model.dropout)

Loaded hyperparameters:
---------------------------
Model Dimension:     300
Num Layers:          2
Num Heads:           4
Max Length:          100
Dropout Per.:        Dropout(p=0.2, inplace=False)


## Test Text Generation
Given our mappings we just read in we can now have the transformer make some text.

In [7]:
start_str = "I went to the"
number_of_words = 10

gen_words = start_str.split(" ")
        
print(start_str, end=" ")
        
for word in model_utils.generate_text(
    model=emb_model,
    vocab=emb_vocab_obj,
    start_str=start_str,
    max_length=number_of_words):
    gen_words.append(word)
    print(word, end=" ")

I went to the 



saraya bda toomay 2-digit pre-operative catonsville subtractions cock selyem 64.74 

## Train the Model

In [8]:
learning_rate = 3e-4

optimizer = optim.Adam(emb_model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

The data will be split into 1000 batches. For each batch, the content will be pulled out. This content will be tokenized and fed into the *train_model* function.

In [9]:
article_iter = ArticlesIter(batch_size=1000)
total_loss = 0.0
total_batches = 0

for batch_i, batch in enumerate(article_iter):
    if batch_i < 5:
        print("Training batch:", batch_i)
        batch_content = batch["Content"]

        # for article in batch_content:
        #     tokenized_batch_content = tokenize_raw_data(article, emb_vocab_obj)

        #     print(article)
        #     print(tokenized_batch_content)

        total_loss += model_utils.train_batch(
            model=emb_model,
            vocab=emb_vocab_obj,
            optimizer=optimizer,
            criterion=criterion,
            batch=batch_content,
            device=device
        )
        total_batches += 1

print("Average Loss:", total_loss/total_batches)


Training batch: 0




KeyboardInterrupt: 

### Save the trained model
Save the model to a .pt file