In [1]:
from avgm.data import Dataset, PadSequence
from torch.utils import data

In [2]:
ds_train, ds_valid, ds_test = Dataset.read_hdf5("../data/reviews/tokenized.h5")

In [3]:
dl_train = data.DataLoader(ds_train, batch_size=64, collate_fn=PadSequence(ds_train.PAD_INDEX))

In [4]:
import torch.nn as nn
import torch

device = "cuda"

class AVGM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, output_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.gru = nn.GRU(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, tokens, lengths):
        embedded = self.embedding(tokens)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, lengths, batch_first=True)
        packed_output, hidden = self.gru(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        hidden = hidden.squeeze(0)
        linear = self.fc(hidden)
        return linear
    
model = AVGM(10000, 256, 10, 10).to(device)

In [5]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

epoch_loss = 0
print_every = 50

model.train()
for idx, (tokens, lengths, scores) in enumerate(dl_train):
    tokens = tokens.to(device)
    lengths = lengths.to(device)
    scores = scores.to(device)
    optimizer.zero_grad()
    pred = model(tokens, lengths)
    loss = criterion(pred, scores)
    loss.backward()
    optimizer.step()
    
    epoch_loss += loss.item()
    
    if idx % print_every == 0:
        print(loss.item())

2.3407678604125977
2.0524790287017822
1.7109973430633545
2.038256883621216
2.59321928024292
2.021029233932495
1.475994348526001
2.0800676345825195
2.1745822429656982
1.4523919820785522
1.9321473836898804
1.9558751583099365
1.9596513509750366
2.093621253967285
1.594265341758728
1.9891183376312256
1.6430646181106567
2.026240348815918
1.7544058561325073
1.6521321535110474
2.1556026935577393
2.248049259185791
1.183997392654419
0.9475277662277222
0.3891753554344177
0.23222985863685608
2.3603057861328125
2.2337112426757812
2.0156960487365723
1.469807505607605
2.0749168395996094
1.7028934955596924
2.579878330230713
2.223663806915283
1.5985597372055054
2.1275618076324463
1.0906950235366821
1.9886415004730225
1.9037256240844727
1.9216930866241455
1.9090750217437744
1.3664392232894897
1.0608646869659424
2.284879446029663
2.018115282058716
1.5771710872650146
1.8011329174041748
1.7500807046890259
2.0599160194396973
1.0179951190948486
1.4839439392089844
1.0367655754089355
1.7333407402038574
2.06668

RuntimeError: CUDA out of memory. Tried to allocate 4.66 GiB (GPU 0; 8.00 GiB total capacity; 4.73 GiB already allocated; 1.52 GiB free; 4.75 GiB reserved in total by PyTorch) (malloc at ..\c10\cuda\CUDACachingAllocator.cpp:289)
(no backtrace available)