# Environment


In [1]:
from google.colab import drive, runtime
import json
import string
import re
import random
from collections import defaultdict
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim import AdamW
!pip install torchinfo
from torchinfo import summary


import nltk
nltk.download('punkt')
from nltk.tokenize import WhitespaceTokenizer
from nltk.stem.snowball import SnowballStemmer
!pip install contractions
import contractions

seed = 27
random.seed(seed)

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl (8.7 kB)
Collecting textsearch>=0.0.21 (from contractions)
  Downloading textsearch-0.0.24-py2.py3-none-any.whl (7.6 kB)
Collecting anyascii (from textsearch>=0.0.21->contractions)
  Downloading anyascii-0.3.2-py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.9/289.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyahocorasick (from textsearch>=0.0.21->contractions)
  Downloading pyahocorasick-2.0.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.8/110.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyahocorasick, anyascii, textsearch, contractions
Successfully installed anyascii-0.3.2 contractions-0.1.73 pyahocorasick-2.0.0 textsearch-0.0.24


In [2]:
!python --version

Python 3.10.12


# GPU Check

In [3]:
# From
# https://colab.research.google.com/notebooks/pro.ipynb
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

/bin/bash: line 1: nvidia-smi: command not found


# Google Drive

## Mount Drive

In [4]:
## Mount the drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Paths

In [5]:
# Paths
drive_path = "/content/drive/MyDrive" # Do not change
internal_path = "Datasets"
file_name = "ahk_dataset_v2.json"

data_folder = "/content/data"
!mkdir -p $data_folder
!cp $drive_path/$internal_path/$file_name $data_folder/$file_name

## Load Data

In [6]:
with open(f"{data_folder}/{file_name}", 'r') as file:
    data = json.load(file)

# Raw Data

## Statistics

In [7]:
help = 0
rule_five = 0
normal = 0

for dp in data:
    if dp["help"] == True and dp["rule5"] == False:
        help += 1
    elif dp["rule5"] == True:
        rule_five += 1
    else:
        normal += 1

total = help + rule_five + normal

print(f"There are \t{help}\t help samples \t\t({(help/total)*100:.2f} %)")
print(f"\t\t{rule_five}\t rule five samples \t({(rule_five/total)*100:.2f} %)")
print(f"\t\t{normal}\t normal samples \t({(normal/total)*100:.2f} %)")

print(f"\nTotal: {total}")

There are 	4504	 help samples 		(1.86 %)
		375	 rule five samples 	(0.15 %)
		237416	 normal samples 	(97.99 %)

Total: 242295


In [8]:
print(data[7110]["text"])

walls don't mean shit anymore lmfao but you can also fall through walls so you need to stand "on top" of an object or the top of a portal so if you're watching the inbounds speedrun, and they're shooting through walls and shit, they're also juggling where they're standing stand in the wrong spot and they fall through the ground


## Standarize

In [9]:
def remove_discord_emojis(text):
    emoji_pattern = re.compile("["
                                u"\U0001F600-\U0001F64F"  # emoticons
                                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                                u"\U0001F680-\U0001F6FF"  # transport & map symbols
                                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                                u"\U00002702-\U000027B0"  # other emoticons
                                u"\U000024C2-\U0001F251"  # emojis
                                "]+", flags=re.UNICODE)

    return emoji_pattern.sub(" ", text)

def remove_urls(text):
    return re.sub(r"https?://\S+", " ", text)

def remove_mentions(text):
    return re.sub(r"@\S+", "", text)

def remove_custom_emojis(text):
    txt = re.sub(r":\b(?!\d\d)\w+\b:", " ", text)
    return txt

def special_to_space(text): # Excludes ' because we need it for contractions
    return re.sub(r"[^a-zA-Z0-9'\s]+", " ", text)

def compress_whitespace(text):
    return re.sub(r"\s+", " ", text)


def standardize_text(text, stemmer, contractions):
    txt = text.lower()

    txt = remove_urls(txt)
    txt = remove_mentions(txt)
    txt = remove_discord_emojis(txt)
    txt = remove_custom_emojis(txt)

    txt = special_to_space(txt)

    words = txt.split()
    txt = ""
    for word in words:
        word = contractions.fix(word)
        word = re.sub(r"\'", "", word)
        txt += f"{word} " # space for separation

    words = txt.split()
    txt = ""
    for word in words:
        word = stemmer(word)
        txt += f"{word} " # space for separation

    txt = compress_whitespace(txt)
    txt = txt.strip()

    return txt

In [10]:
stemmer = SnowballStemmer("english")

standardizer = lambda x: standardize_text(x, stemmer.stem, contractions)

In [11]:
print(data[7110]["text"])

walls don't mean shit anymore lmfao but you can also fall through walls so you need to stand "on top" of an object or the top of a portal so if you're watching the inbounds speedrun, and they're shooting through walls and shit, they're also juggling where they're standing stand in the wrong spot and they fall through the ground


In [12]:
for dp in data:
    dp["text"] = standardizer(dp["text"])

In [13]:
print(data[7110]["text"])

wall do not mean shit anymor lmfao but you can also fall through wall so you need to stand on top of an object or the top of a portal so if you are watch the inbound speedrun and they are shoot through wall and shit they are also juggl where they are stand stand in the wrong spot and they fall through the ground


## Separate Data

In [14]:
texts = []
labels = []

for dp in data:
    texts.append(dp["text"])

    help = dp["help"]
    rule5 = dp["rule5"]

    if rule5 or help:
        labels.append([0, int(help), int(rule5)])
    else:
        labels.append([1, 0, 0])


In [15]:
print(texts[7110])

wall do not mean shit anymor lmfao but you can also fall through wall so you need to stand on top of an object or the top of a portal so if you are watch the inbound speedrun and they are shoot through wall and shit they are also juggl where they are stand stand in the wrong spot and they fall through the ground


In [16]:
print(labels[7110])

[1, 0, 0]


# Build Dataset

## Custom Dataset

In [17]:
class CustomDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        input_data = self.inputs[index]
        label = self.labels[index]
        return input_data, label

In [18]:
ahk_dataset = CustomDataset(texts, labels)

## Random Split

In [19]:
validation_split = 0.1
test_split  = 0.1

train_split = 1 - validation_split - test_split

In [20]:
generator = torch.Generator().manual_seed(seed)

train_dataset, val_dataset, test_dataset = random_split(
    ahk_dataset,
    [train_split, validation_split, test_split],
    generator=generator
)

In [21]:
print(f"Training: \t{len(train_dataset)}")
print(f"Validation: \t{len(val_dataset)}")
print(f"Test: \t\t{len(test_dataset)}")

Training: 	193837
Validation: 	24229
Test: 		24229


## Data Loaders

In [22]:
batch_size = 400

In [23]:
num_workers = 2
prefetch = 2 # batches
pin_memory = True # https://pytorch.org/docs/stable/data.html#memory-pinning

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    prefetch_factor=prefetch,
    pin_memory=pin_memory
)

valid_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    prefetch_factor=prefetch,
    pin_memory=pin_memory
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    prefetch_factor=prefetch,
    pin_memory=pin_memory
)

# Text Vectorization

## Class

In [24]:
class TextVectorization(nn.Module):
    def __init__(self, max_vocabulary, max_tokens):
        super(TextVectorization, self).__init__()
        self.max_tokens = max_tokens
        self.max_vocabulary = max_vocabulary
        self.word_dictionary = dict()
        self.dictionary_size = 0

    def adapt(self, dataset):
        # Calculate word frequencies
        word_frequencies = defaultdict(int)

        for text in dataset:
            for word in text[0].split():
                word_frequencies[word] += 1

        # Sort the dictionary by word frequencies in descending order
        sorted_word_frequencies = dict(sorted(word_frequencies.items(),
                                              key=lambda item: item[1],
                                              reverse=True)
        )

        # Take the top (max_vocabulary - 2) most frequent words
        # since indices 0 and 1 are reserved for padding and missing words respectively
        most_frequent = list(sorted_word_frequencies.items())[:self.max_vocabulary - 2]
        # Note that len(most_frequent) does not necessarily equal
        # (max_vocabulary - 2), since there could be less words overall
        # than the max_vocabulary limit
        self.dictionary_size = len(most_frequent) + 2

        # Note starting at 2 since 0 (padding) and 1 (missing) are reserved
        for word_value, (word, frequency) in enumerate(most_frequent, 2):
            self.word_dictionary[word] = word_value

        # if len(self.word_dictionary) < self.max_vocabulary:
        #     raise ValueError(
        #         f"Current size of the dictionary ({len(self.word_dictionary)}) "
        #         f"exceeds the defined limit ({self.max_vocabulary})"
        #     )

    def vocabulary_size(self):
        return self.dictionary_size

    def dictionary(self):
        return self.word_dictionary

    def forward(self, batch_x):
        try:
            batch_text_vectors = torch.zeros((len(batch_x), self.max_tokens), dtype=torch.int32)

            for i, text in enumerate(batch_x):

                # Split the text and tokenize it
                words = text.split()[:self.max_tokens]

                for pos, word in enumerate(words):
                    batch_text_vectors[i, pos] = self.word_dictionary.get(word, 1)

            return batch_text_vectors

        except IndexError:
            print("Looks like you are out of indicies")

    # def forward(self, x):
    #     text_vector = torch.zeros(self.max_tokens, dtype=torch.int32)

    #         # Split the text and tokenize it
    #     words = x.split()[:self.max_tokens]

    #     for pos, word in enumerate(words):
    #         text_vector[pos] = self.word_dictionary.get(word, 1)

    #    return text_vector

## Adapt

In [25]:
vectorize_layer = TextVectorization(
    max_vocabulary=60000,
    max_tokens=150
)

In [26]:
vectorize_layer.adapt(train_dataset)

In [27]:
vectorize_layer.vocabulary_size()

49913

In [28]:
# vectorize_layer.dictionary()

## Samples Tests

In [29]:
samples = [
    "trvger7 implements the russian style of trap metal perfectly, where he's enjoyable mnogoznaal's music is literally magical triplesixdelete is the best trap metal producer",
    "That's why I'm going 3090, 24 GB of vram, but the 4070 ti is equal or greater than by 1-5% depending on the title to the 3090 ti so some performance loss but fuck 12 GB of vram utter scam",
    "ya I always hear about that, have yet to have it happen then again I don't uaually do wild stuff fortnite, I kinda did sketchy stuff, everything else was just keybind stuff",
    "I went from a full desk as a mouse pad to a giant one, now to one that fits in my laptop bag But I am not a pro gamer so ya",
    "I've played cs2 yesterday for the first time, and cs overall after like an year or two.  I didn't follow too much info cs2 related, " +\
    "I can't say I went in blind but I only knew the most big changes (like smoke).  It's nice, but I wasn't very impressed. That said I don't know what would make me impressed." +\
    "I had some trouble adjusting to the gameplay style. Idk, peeker's advantage feels way bigger than it was before. Also I used to play at 128 tickrate, and shots in the new system " +\
    "doesn't seem to be as accurate as they were. Could be my aim being crap after such a long period without playing, but I swear too many eagle shots missed when they were spot on in the model's head."
]

In [30]:
for idx, sample in enumerate(samples):
    samples[idx] = standardizer(sample)

In [31]:
vectorize_layer(samples)

tensor([[49911,  1044,     3,  1358,   790,    12,  1841,  1422,   543,   125,
            80,     7,   706, 25197,   613,     7,   345,   988, 49912,     7,
             3,   245,  1841,  1422,  1977,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,  

# Transformer

## Embeddings

In [32]:
class TokenAndPositionEmbedding(nn.Module):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()

        self.token_emb = nn.Embedding(
            num_embeddings=vocab_size,
            embedding_dim=embed_dim,
            padding_idx=0
        )

        self.pos_emb = nn.Embedding(
            num_embeddings=maxlen,
            embedding_dim=embed_dim
        )

    def forward(self, x):
        maxlen = x.size(-1)
        positions = torch.arange(0, maxlen, dtype=torch.int32, device=x.device)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

### Test

In [33]:
random_tokens = torch.randint(
    low=0,
    high=vectorize_layer.vocabulary_size(),
    size=(batch_size, 150),
    dtype=torch.int32
)

test_emb = TokenAndPositionEmbedding(150, vectorize_layer.vocabulary_size(), 16)
test_emb_out = test_emb(random_tokens)

## Transformer Block

In [34]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.attention = nn.MultiheadAttention(
            embed_dim,
            num_heads,
            dropout=dropout,
            batch_first=True
        )
        self.norm_1 = nn.LayerNorm(embed_dim)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm_2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        # Multihead self-attention
        attn_output, _ = self.attention(x, x, x)

        # Residual connection and normalization
        x = x + self.dropout(attn_output)
        x = self.norm_1(x)

        # Feed-forward network
        ffn_output = self.ffn(x)

        # Residual connection and normalization
        x = x + self.dropout(ffn_output)
        x = self.norm_2(x)

        return x


### Test

In [35]:
test_tblock = TransformerBlock(
    embed_dim=16,
    num_heads=2,
    ff_dim=32,
    dropout=0.1
)

test_tblock_out = test_tblock(test_emb_out)

print(test_tblock_out.shape)
print(test_tblock_out.dtype)

torch.Size([400, 150, 16])
torch.float32


# Transformer Model

## Parameters

In [36]:
max_tokens = 150
vocab_size = vectorize_layer.vocabulary_size()
embed_dim = 16  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

## Artchitecture

In [37]:
class TransformerModel(nn.Module):
    def __init__(self, max_tokens, vocab_size, embed_dim, num_heads, ff_dim):
        super(TransformerModel, self).__init__()
        # self.vectorize_layer = vectorize_layer
        self.embedding_layer = TokenAndPositionEmbedding(
            max_tokens,
            vocab_size,
            embed_dim
        )
        self.transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.global_avg_pooling = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(embed_dim, 20)
        self.fc2 = nn.Linear(20, 3)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # x = self.vectorize_layer(x)
        x = self.embedding_layer(x)
        x = self.transformer_block(x)
        x = self.global_avg_pooling(x.permute(0, 2, 1)).squeeze(2)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

## Init

In [38]:
model = TransformerModel(
    max_tokens=max_tokens,
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_heads=num_heads,
    ff_dim=ff_dim
)

## Test

In [39]:
random_tokens = torch.randint(
    low=0,
    high=vectorize_layer.vocabulary_size(),
    size=(batch_size, 150),
    dtype=torch.int32
)


model.forward(random_tokens)[:5]

tensor([[0.2857, 0.3291, 0.3853],
        [0.3191, 0.3226, 0.3583],
        [0.2873, 0.3286, 0.3842],
        [0.2829, 0.3112, 0.4059],
        [0.2910, 0.3431, 0.3659]], grad_fn=<SliceBackward0>)

## Summary

In [40]:
random_tokens = torch.randint(
    low=0,
    high=vectorize_layer.vocabulary_size(),
    size=(batch_size, 150),
    dtype=torch.int32
)

summary(model, input_data=random_tokens)

Layer (type:depth-idx)                   Output Shape              Param #
TransformerModel                         [400, 3]                  --
├─TokenAndPositionEmbedding: 1-1         [400, 150, 16]            --
│    └─Embedding: 2-1                    [150, 16]                 2,400
│    └─Embedding: 2-2                    [400, 150, 16]            798,608
├─TransformerBlock: 1-2                  [400, 150, 16]            --
│    └─MultiheadAttention: 2-3           [400, 150, 16]            1,088
│    └─Dropout: 2-4                      [400, 150, 16]            --
│    └─LayerNorm: 2-5                    [400, 150, 16]            32
│    └─Sequential: 2-6                   [400, 150, 16]            --
│    │    └─Linear: 3-1                  [400, 150, 32]            544
│    │    └─ReLU: 3-2                    [400, 150, 32]            --
│    │    └─Linear: 3-3                  [400, 150, 16]            528
│    └─Dropout: 2-7                      [400, 150, 16]            --
│ 

## Append Pre-processing

In [41]:
class TransformerModel(nn.Module):
    def __init__(self, max_tokens, vocab_size, embed_dim, num_heads, ff_dim, vectorize_layer):
        super(TransformerModel, self).__init__()
        self.vectorize_layer = vectorize_layer
        self.embedding_layer = TokenAndPositionEmbedding(
            max_tokens,
            vocab_size,
            embed_dim
        )
        self.transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.global_avg_pooling = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(embed_dim, 20)
        self.fc2 = nn.Linear(20, 3)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.vectorize_layer(x)
        x = self.embedding_layer(x)
        x = self.transformer_block(x)
        x = self.global_avg_pooling(x.permute(0, 2, 1)).squeeze(2)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

## Init

In [42]:
model = TransformerModel(
    max_tokens=max_tokens,
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_heads=num_heads,
    ff_dim=ff_dim,
    vectorize_layer=vectorize_layer
)

## Test

In [43]:
model.forward([
        "This is a test",
        "trvger7 implement the russian style of trap metal perfect where he is enjoy mnogozna music is liter magic triplesixdelet is the best trap metal produc"
    ]
)

tensor([[0.3751, 0.3198, 0.3051],
        [0.3635, 0.3183, 0.3182]], grad_fn=<SoftmaxBackward0>)

# Train

## Training Pipeline

In [44]:
import torch
from tqdm import tqdm

class ModelTrainer:
    def __init__(self, epochs, loss, optimizer, patience):
        self.num_epochs = epochs
        self.criterion = loss
        self.optimizer = optimizer
        self.patience = patience
        self.best_validation_loss = float('inf')
        self.patience_counter = 0

    def calculate_accuracy(self, outputs, labels, threshold=0.5):
        # Convert outputs to binary predictions
        preds = outputs > threshold
        # Calculate accuracy
        accuracy = (preds == labels.byte()).float().mean()
        return accuracy.item()

    def train(self, model, train_loader, valid_loader):
        for epoch in range(self.num_epochs):
            model.train()
            total_loss = 0.0
            total_accuracy = 0.0

            train_progress = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}/{self.num_epochs}")

            for batch_idx, batch in train_progress:
                inputs, labels = batch[0], batch[1]

                # Forward pass
                outputs = model(inputs)
                loss = self.criterion(outputs, labels)

                # Backward pass and optimization
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()
                accuracy = self.calculate_accuracy(outputs, labels)
                total_accuracy += accuracy
                train_progress.set_postfix(train_loss=total_loss / (batch_idx + 1), train_accuracy=total_accuracy / (batch_idx + 1))

            # Evaluate the model on the validation dataset
            model.eval()
            validation_loss = 0.0
            validation_accuracy = 0.0

            valid_progress = tqdm(enumerate(valid_loader), total=len(valid_loader), desc=f"Epoch {epoch + 1}/{self.num_epochs}")

            for batch_idx, batch in valid_progress:
                inputs, labels = batch[0], batch[1]
                outputs = model(inputs)
                loss = self.criterion(outputs, labels)
                validation_loss += loss.item()
                accuracy = self.calculate_accuracy(outputs, labels)
                validation_accuracy += accuracy
                valid_progress.set_postfix(validation_loss=validation_loss / (batch_idx + 1), validation_accuracy=validation_accuracy / (batch_idx + 1))

            # Check for early stopping
            if validation_loss < self.best_validation_loss:
                self.best_validation_loss = validation_loss
                self.patience_counter = 0
            else:
                self.patience_counter += 1

            if self.patience_counter >= self.patience:
                print(f"Early stopping after {self.patience} epochs without improvement.")
                break

            print("\n")

        if self.patience_counter < self.patience:
            print("Training completed within patience. No early stopping applied.")



## Training

In [46]:
learning_rate = 0.00003
weight_decay = 0.001
patience = 8
epochs = 50

pos_weights = torch.tensor([0, 50, 500])
loss = nn.BCEWithLogitsLoss(pos_weight=pos_weights)
# loss = nn.BCELoss()

optimizer = AdamW(
    model.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay
)

model_trainer = ModelTrainer(
    epochs=epochs,
    loss=loss,
    optimizer=optimizer,
    patience=patience
)

model_trainer.train(
    model=model,
    train_loader=train_loader,
    valid_loader=valid_loader
)

Epoch 1/50: 100%|██████████| 485/485 [06:55<00:00,  1.17it/s, train_accuracy=0.666, train_loss=0.896]
Epoch 1/50: 100%|██████████| 61/61 [00:20<00:00,  3.04it/s, validation_accuracy=0.666, validation_loss=0.906]






Epoch 2/50: 100%|██████████| 485/485 [06:53<00:00,  1.17it/s, train_accuracy=0.751, train_loss=0.891]
Epoch 2/50: 100%|██████████| 61/61 [00:20<00:00,  3.01it/s, validation_accuracy=0.986, validation_loss=0.901]






Epoch 3/50: 100%|██████████| 485/485 [06:56<00:00,  1.16it/s, train_accuracy=0.958, train_loss=0.884]
Epoch 3/50: 100%|██████████| 61/61 [00:21<00:00,  2.81it/s, validation_accuracy=0.986, validation_loss=0.896]






Epoch 4/50: 100%|██████████| 485/485 [06:53<00:00,  1.17it/s, train_accuracy=0.984, train_loss=0.88]
Epoch 4/50: 100%|██████████| 61/61 [00:21<00:00,  2.83it/s, validation_accuracy=0.986, validation_loss=0.892]






Epoch 5/50: 100%|██████████| 485/485 [06:52<00:00,  1.18it/s, train_accuracy=0.986, train_loss=0.876]
Epoch 5/50: 100%|██████████| 61/61 [00:19<00:00,  3.05it/s, validation_accuracy=0.986, validation_loss=0.89]






Epoch 6/50: 100%|██████████| 485/485 [06:56<00:00,  1.16it/s, train_accuracy=0.986, train_loss=0.874]
Epoch 6/50: 100%|██████████| 61/61 [00:21<00:00,  2.89it/s, validation_accuracy=0.986, validation_loss=0.888]






Epoch 7/50: 100%|██████████| 485/485 [07:02<00:00,  1.15it/s, train_accuracy=0.986, train_loss=0.872]
Epoch 7/50: 100%|██████████| 61/61 [00:20<00:00,  3.01it/s, validation_accuracy=0.986, validation_loss=0.886]






Epoch 8/50: 100%|██████████| 485/485 [06:59<00:00,  1.15it/s, train_accuracy=0.986, train_loss=0.869]
Epoch 8/50: 100%|██████████| 61/61 [00:35<00:00,  1.70it/s, validation_accuracy=0.986, validation_loss=0.882]






Epoch 9/50: 100%|██████████| 485/485 [07:05<00:00,  1.14it/s, train_accuracy=0.986, train_loss=0.865]
Epoch 9/50: 100%|██████████| 61/61 [00:21<00:00,  2.79it/s, validation_accuracy=0.986, validation_loss=0.877]






Epoch 10/50: 100%|██████████| 485/485 [07:22<00:00,  1.10it/s, train_accuracy=0.986, train_loss=0.861]
Epoch 10/50: 100%|██████████| 61/61 [00:21<00:00,  2.77it/s, validation_accuracy=0.986, validation_loss=0.87]






Epoch 11/50: 100%|██████████| 485/485 [07:15<00:00,  1.11it/s, train_accuracy=0.986, train_loss=0.854]
Epoch 11/50: 100%|██████████| 61/61 [00:20<00:00,  2.95it/s, validation_accuracy=0.986, validation_loss=0.861]






Epoch 12/50: 100%|██████████| 485/485 [07:03<00:00,  1.14it/s, train_accuracy=0.985, train_loss=0.847]
Epoch 12/50: 100%|██████████| 61/61 [00:21<00:00,  2.80it/s, validation_accuracy=0.985, validation_loss=0.853]






Epoch 13/50: 100%|██████████| 485/485 [07:16<00:00,  1.11it/s, train_accuracy=0.984, train_loss=0.839]
Epoch 13/50: 100%|██████████| 61/61 [00:21<00:00,  2.84it/s, validation_accuracy=0.983, validation_loss=0.845]






Epoch 14/50: 100%|██████████| 485/485 [07:08<00:00,  1.13it/s, train_accuracy=0.981, train_loss=0.831]
Epoch 14/50: 100%|██████████| 61/61 [00:22<00:00,  2.69it/s, validation_accuracy=0.98, validation_loss=0.838]






Epoch 15/50: 100%|██████████| 485/485 [07:11<00:00,  1.12it/s, train_accuracy=0.978, train_loss=0.824]
Epoch 15/50: 100%|██████████| 61/61 [00:21<00:00,  2.86it/s, validation_accuracy=0.976, validation_loss=0.831]






Epoch 16/50: 100%|██████████| 485/485 [07:11<00:00,  1.12it/s, train_accuracy=0.974, train_loss=0.818]
Epoch 16/50: 100%|██████████| 61/61 [00:20<00:00,  2.91it/s, validation_accuracy=0.975, validation_loss=0.826]






Epoch 17/50: 100%|██████████| 485/485 [07:06<00:00,  1.14it/s, train_accuracy=0.97, train_loss=0.813]
Epoch 17/50: 100%|██████████| 61/61 [00:20<00:00,  2.91it/s, validation_accuracy=0.968, validation_loss=0.821]






Epoch 18/50: 100%|██████████| 485/485 [06:54<00:00,  1.17it/s, train_accuracy=0.964, train_loss=0.808]
Epoch 18/50: 100%|██████████| 61/61 [00:22<00:00,  2.70it/s, validation_accuracy=0.963, validation_loss=0.818]






Epoch 19/50: 100%|██████████| 485/485 [06:58<00:00,  1.16it/s, train_accuracy=0.959, train_loss=0.804]
Epoch 19/50: 100%|██████████| 61/61 [00:20<00:00,  2.94it/s, validation_accuracy=0.96, validation_loss=0.814]






Epoch 20/50: 100%|██████████| 485/485 [07:04<00:00,  1.14it/s, train_accuracy=0.957, train_loss=0.8]
Epoch 20/50: 100%|██████████| 61/61 [00:20<00:00,  2.96it/s, validation_accuracy=0.952, validation_loss=0.811]






Epoch 21/50: 100%|██████████| 485/485 [07:04<00:00,  1.14it/s, train_accuracy=0.952, train_loss=0.797]
Epoch 21/50: 100%|██████████| 61/61 [00:20<00:00,  2.97it/s, validation_accuracy=0.948, validation_loss=0.809]






Epoch 22/50: 100%|██████████| 485/485 [07:06<00:00,  1.14it/s, train_accuracy=0.95, train_loss=0.795]
Epoch 22/50: 100%|██████████| 61/61 [00:21<00:00,  2.89it/s, validation_accuracy=0.948, validation_loss=0.807]






Epoch 23/50: 100%|██████████| 485/485 [07:04<00:00,  1.14it/s, train_accuracy=0.949, train_loss=0.793]
Epoch 23/50: 100%|██████████| 61/61 [00:21<00:00,  2.88it/s, validation_accuracy=0.946, validation_loss=0.805]






Epoch 24/50:  27%|██▋       | 132/485 [01:56<05:11,  1.13it/s, train_accuracy=0.945, train_loss=0.8]


KeyboardInterrupt: 

In [53]:
sample = "Can someone help with an ahk script?"
std_sample = standardizer(sample)
print(std_sample)

model([std_sample])

can someon help with an ahk script


tensor([[0.8151, 0.1315, 0.0534]], grad_fn=<SoftmaxBackward0>)

# Unassign

In [None]:
# runtime.unassign()