In [1]:
import importlib
import torch
import torch.nn as nn
from torch import optim
from torchtext.data import get_tokenizer

import sys

In [2]:
# local imports 
sys.path.append("..")

import transformer_model.model as model
importlib.reload(model)

import transformer_model.model_utils as model_utils
importlib.reload(model_utils)

import transformer_model.data_utils as data_utils
importlib.reload(data_utils)

import data.data_loaders as data_loaders
importlib.reload(data_loaders)

<module 'data.data_loaders' from '/home/cstainsby/B-Onion/src/notebooks/../data/data_loaders.py'>

## Setup

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = get_tokenizer("spacy") # as a standard for all training I will be using spacy

print("Current Device:", device)

if device.type == "cuda":
    print(f"Current CUDA memory usage: {torch.cuda.max_memory_allocated(device=device) / 1024 ** 3:.2f} GB")
    print(f"Current CUDA memory avalible: {torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)} GB")



Current Device: cuda
Current CUDA memory usage: 0.00 GB
Current CUDA memory avalible: 15686828032 GB


### Load Vocab

In [4]:
SAVE_PATH = "../transformer_model/vocab_save/" + "embeddings_vocab.pt"

emb_vocab_obj = torch.load(SAVE_PATH)
data_processor = data_utils.DataProcessor(vocab=emb_vocab_obj, tokenizer=tokenizer)

print("Vocab size:", len(emb_vocab_obj))

Vocab size: 400004


### Load Model

In [5]:
SAVE_PATH = "../transformer_model/model_save/" + "glove_emb_model.pt"

emb_model = torch.load(SAVE_PATH)

print("Loaded hyperparameters:")
print("---------------------------")
print("Model Dimension:    ", emb_model.d_model)
print("Num Layers:         ", emb_model.num_layers)
print("Num Heads:          ", emb_model.nhead)
print("Max Length:         ", emb_model.max_seq_len)
print("Dropout Per.:       ", emb_model.dropout)
print("---------------------------")

Loaded hyperparameters:
---------------------------
Model Dimension:     300
Num Layers:          2
Num Heads:           4
Max Length:          100
Dropout Per.:        Dropout(p=0.2, inplace=False)
---------------------------


## Test Text Generation
Given our mappings we just read in we can now have the transformer make some text.

In [6]:
# start_str = "I went to the"
# number_of_words = 10

# gen_words = start_str.split(" ")
        
# print(start_str, end=" ")
        
# for word in model_utils.generate_text(
#     model=emb_model,
#     vocab=emb_vocab_obj,
#     start_str=start_str,
#     max_length=number_of_words):
#     gen_words.append(word)
#     print(word, end=" ")

## Train the Model

In [7]:
learning_rate = 3e-4

optimizer = optim.Adam(emb_model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

The data will be split into 1000 batches. For each batch, the content will be pulled out. This content will be tokenized and fed into the *train_model* function.

In [8]:
total_loss = 0.0
total_batches = 0

for batch_i, batch in enumerate(data_loaders.generate_articles(batch_size=1)):
    print("Training batch:", batch_i)
    batch_content = batch["Content"]

    total_loss += model_utils.train_batch(
        model=emb_model,
        data_processor=data_processor,
        optimizer=optimizer,
        criterion=criterion,
        batch=batch_content,
        device=device
    )
    total_batches += 1

print("Average Loss:", total_loss/total_batches)


Training batch: 0
 - Loss = 0.0230
 - Memory Allocated:Current CUDA memory usage: 6.30 GB
 - Total time: 0.9140408039093018
Training batch: 1
 - Loss = 0.0422
 - Memory Allocated:Current CUDA memory usage: 6.30 GB
 - Total time: 0.23524713516235352
Training batch: 2
 - Loss = 0.0108
 - Memory Allocated:Current CUDA memory usage: 9.96 GB
 - Total time: 0.6453776359558105
Training batch: 3
 - Loss = 0.0192
 - Memory Allocated:Current CUDA memory usage: 9.96 GB
 - Total time: 0.4762561321258545
Training batch: 4
 - Loss = 0.0192
 - Memory Allocated:Current CUDA memory usage: 9.96 GB
 - Total time: 0.4122445583343506
Training batch: 5
 - Loss = 0.0144
 - Memory Allocated:Current CUDA memory usage: 9.96 GB
 - Total time: 0.5013747215270996
Training batch: 6
 - Loss = 0.0186
 - Memory Allocated:Current CUDA memory usage: 9.96 GB
 - Total time: 0.41969871520996094
Training batch: 7
 - Loss = 0.0087
 - Memory Allocated:Current CUDA memory usage: 11.64 GB
 - Total time: 0.8227598667144775
Train

OutOfMemoryError: CUDA out of memory. Tried to allocate 3.54 GiB (GPU 0; 14.61 GiB total capacity; 13.53 GiB already allocated; 151.12 MiB free; 13.78 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### Save the trained model
Save the model to a .pt file