In [8]:
!git clone https://github.com/caseyhird/casey-lm.git

Cloning into 'casey-lm'...
remote: Enumerating objects: 17, done.[K
remote: Counting objects: 100% (17/17), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 17 (delta 2), reused 9 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (17/17), 6.18 KiB | 6.18 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [19]:
!cd /content/casey-lm && pip install -qr requirements.txt

Collecting appnope==0.1.4 (from -r requirements.txt (line 4))
  Downloading appnope-0.1.4-py2.py3-none-any.whl.metadata (908 bytes)
Collecting asttokens==2.4.1 (from -r requirements.txt (line 5))
  Downloading asttokens-2.4.1-py2.py3-none-any.whl.metadata (5.2 kB)
Collecting comm==0.2.2 (from -r requirements.txt (line 9))
  Downloading comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)
Collecting datasets==3.0.0 (from -r requirements.txt (line 10))
  Downloading datasets-3.0.0-py3-none-any.whl.metadata (19 kB)
Collecting debugpy==1.8.5 (from -r requirements.txt (line 11))
  Downloading debugpy-1.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting decorator==5.1.1 (from -r requirements.txt (line 12))
  Downloading decorator-5.1.1-py3-none-any.whl.metadata (4.0 kB)
Collecting dill==0.3.8 (from -r requirements.txt (line 13))
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting executing==2.1.0 (from -r requirements.txt (line 14))
  Downl

In [25]:
# Allow imports from the project directory
import sys
sys.path.insert(0, '/content/casey-lm')


In [20]:
import torch

# Set the default tensor type to CUDA (GPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [1]:
from data.glue_dataloader import gen_dataloaders
batch_size = 32
max_length = 128

train_dataloader, val_dataloader, vocab_size = gen_dataloaders(batch_size, max_length)

  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 872/872 [00:00<00:00, 22292.72 examples/s]


In [28]:
import tqdm
import torch
from model.torch_impl.language_model import TorchLanguageModel, LanguageModelConfig

def do_train():
    model = TorchLanguageModel(LanguageModelConfig(**{
        'vocab_size': vocab_size,
        'context_length': max_length,
        'embedding_dim': 8,
        'num_decoder_layers': 3,
        'num_heads': 2,
        'dim_feedforward': 32,
        'dropout': 0.1,
    }))
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(5):
        model.train()
        train_loss = 0.0
        num_train_examples = 0
        for batch in tqdm.tqdm(train_dataloader):
            inputs = batch[:, :-1]
            labels = batch[:, 1:]
            optimizer.zero_grad()
            output = model(inputs)
            loss = criterion(output.view(-1, vocab_size), labels.flatten())
            train_loss += loss.item()
            num_train_examples += labels.numel()
            loss.backward()
            optimizer.step()

        train_loss /= num_train_examples
        print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}')

        model.eval()
        with torch.no_grad():
            total_correct = 0
            num_val_examples = 0
            for batch in val_dataloader:
                inputs = batch[:, :-1]
                labels = batch[:, 1:]
                output = model(inputs)
                _, predicted = torch.max(output, dim=2)
                total_correct += (predicted == labels).sum().item()
                num_val_examples += labels.numel()

            accuracy = total_correct / num_val_examples
            print(f'Epoch {epoch+1}, Validation Accuracy: {accuracy:.4f}')