<center><b>Model to categorize words based on their fisrt character - one hot encoding</b></center>

In [4]:
# Import dependencies
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import numpy as np
import string

# Use device agnostic code
device= "cuda" if torch.cuda.is_available() else "cpu"

In [33]:
# Set the hyperparameters for data creation
STARTING_CHAR = 'c'
ALPHABET = string.ascii_lowercase
NUM_CLASSES = 2 # <- begins with STARTING_CHAR or not (two classes)
NUM_FEATURES = 3 # <- lets have 3 characters in each word
RANDOM_SEED = 42


# Function to get one-hot encoding of a character
def char_to_vec(character):
    character = character.lower()
    char_vec = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    alph_index = ALPHABET.index(character)
    char_vec[alph_index] = 1
    return char_vec

# Create the input data
raw_inputs = ["Cat", "put", "Rat", "cut", "Car", "Tuc", "mat", "cot", "key", "Cup", "bit", "lab",
                  "cow", "Ten", "cap", "one", "run", "Can", "Cab", "cub"]

# Convert the inputs to their corrsponding one-hot encoding format
total_words = len(raw_inputs)

char_vectors = [] # <- create a list to store the vectors
word_labels = [] # <- a label is in the format [0,1] for words starting with STARTING_CHAR and [1,0] for those that start with other characters

for w in range(total_words):
    word_char_vecs = []
    for c in range(NUM_FEATURES):        
        word_char_vecs.append(char_to_vec(raw_inputs[w][c]))
    
    char_vectors.append(word_char_vecs)
    if raw_inputs[w][0].lower() == STARTING_CHAR:
        word_labels.append([0,1])
    else:
        word_labels.append([1,0])

# Convert the lists to numpy arrays
char_vectors_array = np.array(char_vectors)
word_labels_array = np.array(word_labels)

# Turn them into tensors
X = torch.from_numpy(char_vectors_array).type(torch.float)
y = torch.from_numpy(word_labels_array).type(torch.FloatTensor)


In [35]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

len(X_train), len(X_test), len(y_train), len(y_test)

(16, 4, 16, 4)

In [45]:
# Build the model
class WordCFModel(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.layer_stack = nn.Sequential(
                nn.Linear(in_features=input_features, out_features=hidden_units),
                nn.ReLU(),
                nn.Linear(in_features=hidden_units, out_features=hidden_units),
                nn.ReLU(),
                nn.Linear(in_features=hidden_units, out_features=output_features)            
            )
    def forward(self, x):
        return self.layer_stack(x)

# Create an instance of the model
model_cf = WordCFModel(input_features=26, output_features=NUM_CLASSES, hidden_units=50).to(device)

model_cf

WordCFModel(
  (layer_stack): Sequential(
    (0): Linear(in_features=26, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=2, bias=True)
  )
)

In [43]:
# Create the loss function
loss_fn = nn.CrossEntropyLoss()
# Create optimizer function
optimizer = torch.optim.SGD(model_cf.parameters(), lr=0.1) # Stochastic Gradient Descent optimizer

In [40]:
# Create accuracy function
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(torch.softmax(y_true, dim=1).argmax(dim=1), y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [44]:
# Test if the model is working by performing a forward pass
model_cf(X_test.to(device))[:5]

tensor([[[0.0717, 0.1061],
         [0.0711, 0.1034],
         [0.0667, 0.0980]],

        [[0.0717, 0.1061],
         [0.0711, 0.1034],
         [0.0571, 0.0805]],

        [[0.0718, 0.1070],
         [0.0571, 0.0805],
         [0.0658, 0.0935]],

        [[0.0513, 0.0784],
         [0.0327, 0.0568],
         [0.0667, 0.0980]]], grad_fn=<SliceBackward0>)

In [None]:
# Create the training and testing loop
torch.manual_seed(RANDOM_SEED)

epochs = 100

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    ### Training
    model_cf.train()

    # 1. Forward pass
    train_logits = model_cf(X_train) # model outputs raw logits
    train_preds = torch.soft_max(train_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels

    
