In [13]:
!source ~/venv-ptmetal/bin/activate

In [27]:
import torch
from torch import nn
import torch.nn.functional as F

device = "cpu"
if torch.backends.mps.is_available():
   device = "mps"
   x = torch.ones(1).to("mps")
   print (x)
else:
   print ("MPS device not found.")

tensor([1.], device='mps:0')


In [30]:
import time

start_time = time.time()

torch.mps.synchronize()
a = torch.ones(32000,32000).to(device)
for _ in range(500):
   a +=a

elapsed_time = time.time() - start_time
print( "GPU Time: ", elapsed_time)

GPU Time:  10.581156015396118


In [31]:
import time

start_time = time.time()

torch.mps.synchronize()

a = torch.ones(32000,32000)
for _ in range(500):
   a +=a

elapsed_time = time.time() - start_time
print( "Non GPU Time: ", elapsed_time)

Non GPU Time:  21.180355072021484


# Deprecated Functions

In [32]:
# new way to tokenize
def build_char_vocab(corpus):
    char_set = set()
    for sentence in corpus:
        char_set.update(sentence)
    # <pad> and <unk>
    char_vocab = {'<pad>': 0, '<unk>': 1}
    char_vocab.update({char: idx + 2 for idx, char in enumerate(sorted(char_set))})
    return char_vocab

corpus = ["Hello world", "This is an example sentence"]
char_vocab = build_char_vocab(corpus)
print(char_vocab)

def build_word_vocab(corpus):
    word_set = set()
    for sentence in corpus:
        word_set.update(sentence.split())
    # Adding <pad> and <unk>
    word_vocab = {'<pad>': 0, '<unk>': 1}  # gonna assume it doesn't need to be sorted, read that we don't need it rn.
    word_vocab.update({word: idx + 2 for idx, word in enumerate(word_set)})
    return word_vocab

corpus = ["Hello world", "This is an example sentence"]
word_vocab = build_word_vocab(corpus)
print(word_vocab)

{'<pad>': 0, '<unk>': 1, ' ': 2, 'H': 3, 'T': 4, 'a': 5, 'c': 6, 'd': 7, 'e': 8, 'h': 9, 'i': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'r': 16, 's': 17, 't': 18, 'w': 19, 'x': 20}
{'<pad>': 0, '<unk>': 1, 'Hello': 2, 'This': 3, 'sentence': 4, 'example': 5, 'is': 6, 'an': 7, 'world': 8}


In [33]:
# def one_hot_encode(index, vocab_size):
#     one_hot = torch.zeros(vocab_size)
#     one_hot[index] = 1
#     return one_hot

# def preprocess_corpus(corpus, char_vocab, max_length):
#     vocab_size = len(char_vocab)
#     processed_corpus = []
#     for sentence in corpus:
#         sentence_indices = [char_vocab.get(char, char_vocab['<unk>']) for char in sentence.lower()]
#         padded_indices = sentence_indices + [char_vocab['<pad>']] * (max_length - len(sentence_indices))
#         # one_hot_sentence = [one_hot_encode(index, vocab_size) for index in padded_indices[:max_length]]
#         processed_corpus.append(torch.stack(one_hot_sentence))
#     return torch.stack(processed_corpus)

'''
 Do not need one hot encoding. 
 will shrink char_embed dims to align with Character-level 
 Convolutional Neural Network section in paper later on. i.e. d < |C|
'''

def preprocess_corpus(corpus, char_vocab, max_length=30):
    # words should be lowercase, then tokenized, map characters to indices, and pad to max_length
    processed_corpus = []
    for sentence in corpus:
        words = sentence.split()
        for word in words:
            word_indices = [char_vocab.get(char, char_vocab['<unk>']) for char in word.lower()]
            # pad to max_length
            padded_indices = word_indices + [char_vocab['<pad>']] * (max_length - len(word_indices))
            processed_corpus.append(padded_indices[:max_length])
    return processed_corpus

corpus = ["Hello world", "This is an example sentence"]

processed_corpus = preprocess_corpus(corpus, char_vocab)

input_tensor = torch.tensor(processed_corpus)
print(input_tensor)

tensor([[ 9,  8, 11, 11, 14,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [19, 14, 16, 11,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [18,  9, 10, 17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [10, 17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 5, 13,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 8, 20,  5, 12, 15, 11,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [17,  8, 13, 18,  8, 13,  6,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])


# Active Code

In [34]:
import torch
from torch import nn
import torch.nn.functional as F

class HighwayBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.project = nn.Linear(input_dim, output_dim)
        self.transform = nn.Linear(input_dim, output_dim)
        self.trans_bias = nn.Parameter(torch.tensor(-2.0))

    def forward(self, x):
        proj_output = torch.relu(self.project(x))
        trans_output = torch.sigmoid(self.transform(x) + self.trans_bias)
        return trans_output * proj_output + (1 - trans_output) * x

class HighwayNetwork(nn.Module):
    def __init__(self, input_size, output_size, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.layers = []
        for i in range(num_layers):
            layer_size = input_size if i == 0 else output_size
            self.layers.append(HighwayBlock(layer_size, output_size))
            self.add_module(f'highway_block_{i}', self.layers[-1])

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

class ConvolutionBlock(nn.Module):
    def __init__(self, channels, kernel, features):
        super().__init__()
        self.conv_layer = nn.Conv2d(channels, features, kernel)

    def forward(self, x, size_reduce):
        conv_output = torch.tanh(self.conv_layer(x))
        pooled_output = F.max_pool2d(conv_output, kernel_size=[1, size_reduce])
        return pooled_output.squeeze(3).squeeze(2)

class ConvolutionNetwork(nn.Module):
    def __init__(self, channel_size, kernel_sizes, feature_sizes):
        super().__init__()
        self.conv_blocks = nn.ModuleList()

        # applies the filters of differenent widths over input
        for i, (k_size, f_size) in enumerate(zip(kernel_sizes, feature_sizes)):
            self.conv_blocks.append(ConvolutionBlock(channel_size, (1, k_size), f_size))

    def forward(self, x):
        # squeezes output to accomodate for batches
        x = x.unsqueeze(2).transpose(1, 3)
        conv_outputs = [block(x, x.size(3) - k_size + 1) for block, k_size in zip(self.conv_blocks, kernel_sizes)]
        return torch.cat(conv_outputs, 1)

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_rate):
        super().__init__()
        self.rnn_layers = nn.LSTM(input_size, hidden_size, num_layers=num_layers, dropout=dropout_rate)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

    # use this instead of hidden state init outside
    def init_hidden_state(self, batch_size):
        weight = next(self.parameters()).data
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))

    def forward(self, x, hidden):
        output, hidden = self.rnn_layers(x, hidden)
        return output, hidden

class CharacterToWordModel(nn.Module):
    def __init__(self, char_vocab_size, char_embed_dim, word_vocab_size, 
                 conv_out_size, hidden_dim, kernel_sizes, features, num_highway_layers, 
                 num_rnn_layers, dropout):
        super().__init__()
        self.char_vocab_size = char_vocab_size
        self.char_embed_dim = char_embed_dim
        self.word_vocab_size = word_vocab_size
        self.conv_out_size = conv_out_size
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout

        self.char_embedding = nn.Embedding(char_vocab_size, char_embed_dim, padding_idx=0)
        # print("charvocab", char_vocab_size, "char_embed_dim", char_embed_dim)
        self.conv_net = ConvolutionNetwork(char_embed_dim, kernel_sizes, features)
        self.highway_net = HighwayNetwork(conv_out_size, conv_out_size, num_highway_layers)
        self.rnn_net = RNN(conv_out_size, hidden_dim, num_rnn_layers, dropout)
        self.output_layer = nn.Linear(hidden_dim, word_vocab_size)
        self.dropout = nn.Dropout(dropout)

        self.initw()

    def initw(self):
        rng = 0.1
        self.char_embedding.weight.data.uniform_(-rng, rng)
        self.output_layer.bias.data.fill_(0)
        self.output_layer.weight.data.uniform_(-rng, rng)

    def forward(self, input_chars, hidden_state):
        # print("input shape", input_chars.shape)
        emb = self.char_embedding(input_chars)
        # print("embedding shape", emb.shape)
        conv_output = self.conv_net(emb)
        highway_output = self.highway_net(conv_output)
        rnn_output, hidden_state = self.rnn_net(highway_output, hidden_state)
        rnn_output = self.dropout(rnn_output)
        final_output = self.output_layer(rnn_output.view(-1, self.hidden_dim))
        top_word_indices = torch.argmax(final_output, dim=-1)
        return final_output, hidden_state

In [54]:

def load_dataset(file_path):
    with open(file_path, 'r') as file:
        data = file.readlines()
    return data

train_data = load_dataset('train.txt')
valid_data = load_dataset('valid.txt')
test_data = load_dataset('test.txt')


def preprocess_data(data):
    corpus = []

    for sentence in data:

        words = sentence.strip().split()
        for i in range(len(words) - 1):

            predictor, target = words[i], words[i + 1]

            corpus.append((predictor, target))
    return corpus
testing = [
    "Hello world",
    "This is an example sentence",
    "To be or not to be, that is the question",
    "I think, therefore I am",]

test = preprocess_data(testing)
print(len(test))
print(test)

18
[('Hello', 'world'), ('This', 'is'), ('is', 'an'), ('an', 'example'), ('example', 'sentence'), ('To', 'be'), ('be', 'or'), ('or', 'not'), ('not', 'to'), ('to', 'be,'), ('be,', 'that'), ('that', 'is'), ('is', 'the'), ('the', 'question'), ('I', 'think,'), ('think,', 'therefore'), ('therefore', 'I'), ('I', 'am')]


In [55]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# def data_to_tensors(corpus, char_vocab, word_vocab):
#     predictors = []
#     targets = []
#     for predictor, target in corpus:
#         predictor_indices = [char_vocab.get(char, char_vocab['<unk>']) for char in predictor]
#         # print(predictors)
#         target_index = word_vocab.get(target, word_vocab['<unk>'])
#         predictors.append(predictor_indices)
#         targets.append(target_index)
#     print(len(predictors))
#     print(len(targets))
#     return torch.tensor(predictors), torch.tensor(targets)

# train_predictors, train_targets = data_to_tensors(train_corpus, char_vocab, word_vocab)
# valid_predictors, valid_targets = data_to_tensors(valid_corpus, char_vocab, word_vocab)
# test_predictors, test_targets = data_to_tensors(test_corpus, char_vocab, word_vocab)

def data_to_tensors(corpus, char_vocab, word_vocab, max_sequence_length):
    predictors = []
    targets = []

    for predictor, target in corpus:
        predictor_indices = [char_vocab.get(char, char_vocab['<unk>']) for char in predictor]
        target_index = word_vocab.get(target, word_vocab['<unk>'])
        
        # Pad the predictor sequence
        if len(predictor_indices) < max_sequence_length:
            predictor_indices += [char_vocab['<pad>']] * (max_sequence_length - len(predictor_indices))
        else:
            predictor_indices = predictor_indices[:max_sequence_length]

        predictors.append(predictor_indices)
        targets.append(target_index)

    return torch.tensor(predictors), torch.tensor(targets)

# train_predictors, train_targets = data_to_tensors(train_corpus, char_vocab, word_vocab, max_sequence_length)
# valid_predictors, valid_targets = data_to_tensors(valid_corpus, char_vocab, word_vocab, max_sequence_length)
# test_predictors, test_targets = data_to_tensors(test_corpus, char_vocab, word_vocab, max_sequence_length)

# batch_size = 20
# train_dataset = TensorDataset(train_predictors, train_targets)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    

In [52]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [67]:
def build_char_vocab(corpus):
    char_set = set()
    for predictor, target in corpus:
        char_set.update(predictor)
        char_set.update(target)

    char_vocab = {'<pad>': 0, '<unk>': 1}
    char_vocab.update({char: idx + 2 for idx, char in enumerate(sorted(char_set))})
    return char_vocab

def build_word_vocab(corpus):
    word_set = set()
    for predictor, target in corpus:
        word_set.update([predictor, target])

    # do not need to add , '<unk>': 1 since it is in dataset already.
    word_vocab = {'<pad>': 0}

    # update at idx + 1 instead of idx + 2 because we've removed <unk>
    word_vocab.update({word: idx + 1 for idx, word in enumerate(sorted(word_set))})
    return word_vocab

corpus = [
    "Hello world",
    "This is an example sentence",
    "To be or not to be, that is the question",
    "I think, therefore I am",
    "A journey of a thousand miles begins with a single step",
    "All that glitters is not gold",
    "Ask not what your country can do for you, ask what you can do for your country",
    "I have a dream",
    "Elementary, my dear Watson",
    "Houston, we have a problem",
    "Just keep swimming",
    "May the Force be with you",
    "Once upon a time in a land far, far away",
    "Winter is coming",
    "Keep calm and carry on",
    "Why so serious?",
    "There's no place like home",
    # "The cake is a lie",
    # "To infinity and beyond",
    # "Elementary, my dear Watson",
    # "It's a trap!",
    # "Life is like a box of chocolates",
    # "The pen is mightier than the sword",
    # "Knowledge is power",
    # "With great power comes great responsibility",
    # "The only thing we have to fear is fear itself",
    # "I have a dream",
    # "That's one small step for man, one giant leap for mankind",
    # "In the beginning, the universe was created",
    # "I'm just a simple man trying to make my way in the universe",
    # "Do or do not, there is no try",
    # "To boldly go where no one has gone before",
    # "A long time ago in a galaxy far, far away",
    # "Et tu, Brute?",
    # "You can't handle the truth!",
    # "I'm the king of the world!",
    # "They may take our lives, but they'll never take our freedom!",
    # "Frankly, my dear, I don't give a damn",
    # "You talking to me?",
    # "Here's looking at you, kid",
    # "I love the smell of napalm in the morning",
    # "Say hello to my little friend",
    # "Houston, we have a problem",
    # "I'm gonna make him an offer he can't refuse",
    # "Keep your friends close, but your enemies closer",
    # "I feel the need—the need for speed",
    # "Carpe diem. Seize the day, boys",
    # "Elementary, my dear Watson",
    # "Life moves pretty fast. If you don't stop and look around once in a while, you could miss it",
    # "Nobody puts Baby in a corner"
]

train_corpus = preprocess_data(train_data)

char_vocab = build_char_vocab(train_corpus)
word_vocab = build_word_vocab(train_corpus)

char_vocab_size = len(char_vocab)
char_embed_dim = 50
word_vocab_size = len(word_vocab)
print("word vocab size:", word_vocab_size)
conv_out_size = 256
hidden_dim = 512
'''
from figure 1: Note that in the above
example we have twelve filters—three filters of width two
(blue), four filters of width three (yellow), and five filters
of width four (red). Just added one more possibility (5)
'''
kernel_sizes = [2, 3, 4, 5]
features = [64, 64, 64, 64]  # same length as kernel_sizes
num_highway_layers = 2
num_rnn_layers = 2
dropout = 0.1

model = CharacterToWordModel(char_vocab_size, char_embed_dim, word_vocab_size, 
                             conv_out_size, hidden_dim, kernel_sizes, features, 
                             num_highway_layers, num_rnn_layers, dropout)
model.to(device)

max_sequence_length = 50
train_predictors, train_targets = data_to_tensors(train_corpus, char_vocab, word_vocab, max_sequence_length)
print(train_targets.size())


batch_size = 256
train_dataset = TensorDataset(train_predictors, train_targets)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


word vocab size: 9999
torch.Size([845453])


In [71]:
import torch.optim as optim

from tqdm import tqdm

num_epochs = 100
criterion = nn.CrossEntropyLoss().to(device)

                        # Need a small learning rate for the sheer number of params
optimizer = optim.Adam(model.parameters(), lr=0.0001)

print("Using device:", device)
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    total_correct = 0
    total_samples = 0

    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')
    
    for inputs, targets in progress_bar:
        optimizer.zero_grad()
        inputs = inputs.to(device)
        targets = targets.to(device)

        hidden_state = None  
        predictions, _ = model(inputs, hidden_state)

        loss = criterion(predictions.view(-1, word_vocab_size), targets.view(-1)).to(device)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        _, predicted = torch.max(predictions.data, -1)
        total_correct += (predicted.view(-1) == targets.view(-1)).sum().item()
        total_samples += targets.numel()

        avg_loss = total_loss / total_samples
        accuracy = total_correct / total_samples * 100
        progress_bar.set_postfix(loss=avg_loss, accuracy=f'{accuracy:.2f}%')

    print(f"Epoch {epoch+1} completed. Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

    save_path = f'model_saves/model_epoch_{epoch+1}.pth'
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': avg_loss,
        'accuracy': accuracy,
    }, save_path)

    print(f"Model saved to {save_path}")

Using device: mps


Epoch 1/100: 100%|██████████| 3303/3303 [03:06<00:00, 17.75it/s, accuracy=13.11%, loss=0.0234]


Epoch 1 completed. Loss: 0.0234, Accuracy: 13.11%
Model saved to model_saves/model_epoch_1.pth


Epoch 2/100: 100%|██████████| 3303/3303 [03:06<00:00, 17.70it/s, accuracy=15.53%, loss=0.0218]


Epoch 2 completed. Loss: 0.0218, Accuracy: 15.53%
Model saved to model_saves/model_epoch_2.pth


Epoch 3/100: 100%|██████████| 3303/3303 [03:08<00:00, 17.52it/s, accuracy=16.42%, loss=0.0211]


Epoch 3 completed. Loss: 0.0211, Accuracy: 16.42%
Model saved to model_saves/model_epoch_3.pth


Epoch 4/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.32it/s, accuracy=16.97%, loss=0.0206]


Epoch 4 completed. Loss: 0.0206, Accuracy: 16.97%
Model saved to model_saves/model_epoch_4.pth


Epoch 5/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.33it/s, accuracy=17.23%, loss=0.0202]


Epoch 5 completed. Loss: 0.0202, Accuracy: 17.23%
Model saved to model_saves/model_epoch_5.pth


Epoch 6/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.38it/s, accuracy=17.43%, loss=0.0199]


Epoch 6 completed. Loss: 0.0199, Accuracy: 17.43%
Model saved to model_saves/model_epoch_6.pth


Epoch 7/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.33it/s, accuracy=17.54%, loss=0.0197]


Epoch 7 completed. Loss: 0.0197, Accuracy: 17.54%
Model saved to model_saves/model_epoch_7.pth


Epoch 8/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.38it/s, accuracy=17.61%, loss=0.0196]


Epoch 8 completed. Loss: 0.0196, Accuracy: 17.61%
Model saved to model_saves/model_epoch_8.pth


Epoch 9/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.32it/s, accuracy=17.67%, loss=0.0195]


Epoch 9 completed. Loss: 0.0195, Accuracy: 17.67%
Model saved to model_saves/model_epoch_9.pth


Epoch 10/100: 100%|██████████| 3303/3303 [03:08<00:00, 17.52it/s, accuracy=17.73%, loss=0.0194]


Epoch 10 completed. Loss: 0.0194, Accuracy: 17.73%
Model saved to model_saves/model_epoch_10.pth


Epoch 11/100: 100%|██████████| 3303/3303 [03:07<00:00, 17.60it/s, accuracy=17.76%, loss=0.0193]


Epoch 11 completed. Loss: 0.0193, Accuracy: 17.76%
Model saved to model_saves/model_epoch_11.pth


Epoch 12/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.40it/s, accuracy=17.75%, loss=0.0193]


Epoch 12 completed. Loss: 0.0193, Accuracy: 17.75%
Model saved to model_saves/model_epoch_12.pth


Epoch 13/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.36it/s, accuracy=17.81%, loss=0.0192]


Epoch 13 completed. Loss: 0.0192, Accuracy: 17.81%
Model saved to model_saves/model_epoch_13.pth


Epoch 14/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.40it/s, accuracy=17.83%, loss=0.0192]


Epoch 14 completed. Loss: 0.0192, Accuracy: 17.83%
Model saved to model_saves/model_epoch_14.pth


Epoch 15/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.40it/s, accuracy=17.82%, loss=0.0192]


Epoch 15 completed. Loss: 0.0192, Accuracy: 17.82%
Model saved to model_saves/model_epoch_15.pth


Epoch 16/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.43it/s, accuracy=17.80%, loss=0.0191]


Epoch 16 completed. Loss: 0.0191, Accuracy: 17.80%
Model saved to model_saves/model_epoch_16.pth


Epoch 17/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.45it/s, accuracy=17.80%, loss=0.0191]


Epoch 17 completed. Loss: 0.0191, Accuracy: 17.80%
Model saved to model_saves/model_epoch_17.pth


Epoch 18/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.48it/s, accuracy=17.82%, loss=0.0191]


Epoch 18 completed. Loss: 0.0191, Accuracy: 17.82%
Model saved to model_saves/model_epoch_18.pth


Epoch 19/100: 100%|██████████| 3303/3303 [03:08<00:00, 17.48it/s, accuracy=17.77%, loss=0.0191]


Epoch 19 completed. Loss: 0.0191, Accuracy: 17.77%
Model saved to model_saves/model_epoch_19.pth


Epoch 20/100: 100%|██████████| 3303/3303 [03:08<00:00, 17.54it/s, accuracy=17.80%, loss=0.0191]


Epoch 20 completed. Loss: 0.0191, Accuracy: 17.80%
Model saved to model_saves/model_epoch_20.pth


Epoch 21/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.46it/s, accuracy=17.75%, loss=0.0191]


Epoch 21 completed. Loss: 0.0191, Accuracy: 17.75%
Model saved to model_saves/model_epoch_21.pth


Epoch 22/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.30it/s, accuracy=17.73%, loss=0.0191]


Epoch 22 completed. Loss: 0.0191, Accuracy: 17.73%
Model saved to model_saves/model_epoch_22.pth


Epoch 23/100: 100%|██████████| 3303/3303 [03:11<00:00, 17.25it/s, accuracy=17.72%, loss=0.0191]


Epoch 23 completed. Loss: 0.0191, Accuracy: 17.72%
Model saved to model_saves/model_epoch_23.pth


Epoch 24/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.41it/s, accuracy=17.73%, loss=0.0191]


Epoch 24 completed. Loss: 0.0191, Accuracy: 17.73%
Model saved to model_saves/model_epoch_24.pth


Epoch 25/100: 100%|██████████| 3303/3303 [03:09<00:00, 17.38it/s, accuracy=17.70%, loss=0.0191]


Epoch 25 completed. Loss: 0.0191, Accuracy: 17.70%
Model saved to model_saves/model_epoch_25.pth


Epoch 26/100: 100%|██████████| 3303/3303 [03:10<00:00, 17.32it/s, accuracy=17.70%, loss=0.0191]


Epoch 26 completed. Loss: 0.0191, Accuracy: 17.70%
Model saved to model_saves/model_epoch_26.pth


Epoch 27/100: 100%|██████████| 3303/3303 [03:11<00:00, 17.28it/s, accuracy=17.75%, loss=0.0191]


Epoch 27 completed. Loss: 0.0191, Accuracy: 17.75%
Model saved to model_saves/model_epoch_27.pth


Epoch 28/100: 100%|██████████| 3303/3303 [03:11<00:00, 17.23it/s, accuracy=17.72%, loss=0.0191]


Epoch 28 completed. Loss: 0.0191, Accuracy: 17.72%
Model saved to model_saves/model_epoch_28.pth


Epoch 29/100: 100%|██████████| 3303/3303 [03:11<00:00, 17.27it/s, accuracy=17.67%, loss=0.0191]


Epoch 29 completed. Loss: 0.0191, Accuracy: 17.67%
Model saved to model_saves/model_epoch_29.pth


Epoch 30/100: 100%|██████████| 3303/3303 [03:12<00:00, 17.18it/s, accuracy=17.65%, loss=0.0191]


Epoch 30 completed. Loss: 0.0191, Accuracy: 17.65%
Model saved to model_saves/model_epoch_30.pth


Epoch 31/100: 100%|██████████| 3303/3303 [03:12<00:00, 17.16it/s, accuracy=17.69%, loss=0.0192]


Epoch 31 completed. Loss: 0.0192, Accuracy: 17.69%


RuntimeError: Parent directory model_saves does not exist.

In [None]:
# max_sequence_length = 50


# train_predictors, train_targets = data_to_tensors(train_corpus, char_vocab, word_vocab, max_sequence_length, sequence_count)
# valid_predictors, valid_targets = data_to_tensors(valid_corpus, char_vocab, word_vocab, max_sequence_length, sequence_count)
# test_predictors, test_targets = data_to_tensors(test_corpus, char_vocab, word_vocab, max_sequence_length, sequence_count)

# batch_size = 20
# train_dataset = TensorDataset(train_predictors, train_targets)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# # validation and test datasets
# valid_dataset = TensorDataset(valid_predictors, valid_targets)
# valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# test_dataset = TensorDataset(test_predictors, test_targets)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)