In [None]:
import torch
import torch.nn as nn
import sys
import os
from torch.utils.data import DataLoader

sys.path.append(os.path.abspath('../src'))
from dataset import TextDataset
from models import LSTMModel

# Hyperparams
SEQ_LEN = 20
BATCH_SIZE = 64
EMBED_DIM = 128
HIDDEN_DIM = 256
LR = 0.001
EPOCHS = 2

In [2]:
# Dataset
dataset = TextDataset('../data/processed/corpus.txt', seq_len=SEQ_LEN)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

vocab_size = len(dataset.word2idx) + 1
print(f'Vocab Size: {vocab_size}')

Loading corpus from ../data/processed/corpus.txt...
Total tokens loaded: 12984769
Converting tokens to indices...
Vocab Size: 63727


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(vocab_size, EMBED_DIM, HIDDEN_DIM).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

print(model)

LSTMModel(
  (embedding): Embedding(63727, 128, padding_idx=0)
  (lstm): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.3)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=63727, bias=True)
)


In [None]:
# Training Loop
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for step, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs, _ = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if step % 100 == 0:
            print(f'Epoch {epoch+1}, Step {step}, Loss: {loss.item():.4f}')
            
    print(f'Epoch {epoch+1} Average Loss: {total_loss / len(dataloader):.4f}')