<center><b>Model to categorize words based on their fisrt character - one hot encoding</b></center>

In [17]:
# Import dependencies
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import numpy as np
import string

# Use device agnostic code
device= "cuda" if torch.cuda.is_available() else "cpu"

In [18]:
# Set the hyperparameters for data creation
STARTING_CHAR = 'c'
ALPHABET = string.ascii_lowercase
NUM_CLASSES = 2 # <- begins with STARTING_CHAR or not (two classes)
NUM_FEATURES = 26 # <- size of a character vector
RANDOM_SEED = 42


# Function to get one-hot encoding of a character
def char_to_vec(character):
    character = character.lower()
    char_vec = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    alph_index = ALPHABET.index(character)
    char_vec[alph_index] = 1
    return char_vec

# Create the input data
raw_inputs = ["Cat", "put", "Rat", "cut", "Car", "Tuc", "mat", "cot", "key", "Cup", "bit", "lab",
                  "cow", "Ten", "cap", "one", "run", "Can", "Cab", "cub"]

# Convert the inputs to their corrsponding one-hot encoding format
total_words = len(raw_inputs)

char_vectors = [] # <- create a list to store the vectors
word_labels = [] # <- a label is in the format [0,1] for words starting with STARTING_CHAR and [1,0] for those that start with other characters

for w in range(total_words):       
    char_vectors.append(char_to_vec(raw_inputs[w][0])) # We only need the first character vector to classify the word   
    
    if raw_inputs[w][0].lower() == STARTING_CHAR:
        word_labels.append([0,1])
    else:
        word_labels.append([1,0])

# Convert the lists to numpy arrays
char_vectors_array = np.array(char_vectors)
word_labels_array = np.array(word_labels)

# Turn them into tensors
X = torch.from_numpy(char_vectors_array).type(torch.float)
y = torch.from_numpy(word_labels_array).type(torch.LongTensor)


In [19]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

len(X_train), len(X_test), len(y_train), len(y_test)

(16, 4, 16, 4)

In [20]:
# Build the model
class WordCFModel(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=50):
        super().__init__()
        self.layer_stack = nn.Sequential(
                nn.Linear(in_features=input_features, out_features=hidden_units),
                nn.ReLU(),
                nn.Linear(in_features=hidden_units, out_features=hidden_units),
                nn.ReLU(),
                nn.Linear(in_features=hidden_units, out_features=output_features)            
            )
    def forward(self, x):
        return self.layer_stack(x)

# Create an instance of the model
model_cf = WordCFModel(input_features=NUM_FEATURES, output_features=NUM_CLASSES).to(device)

model_cf

WordCFModel(
  (layer_stack): Sequential(
    (0): Linear(in_features=26, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=2, bias=True)
  )
)

In [21]:
# Create the loss function
loss_fn = nn.CrossEntropyLoss()
# Create optimizer function
optimizer = torch.optim.SGD(model_cf.parameters(), lr=0.1) # Stochastic Gradient Descent optimizer

In [22]:
# Create accuracy function
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(torch.softmax(y_true, dim=1).argmax(dim=1), y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [23]:
# Test if the model is working by performing a forward pass
model_cf(X_test.to(device))[:5]

tensor([[0.1291, 0.0110],
        [0.1291, 0.0110],
        [0.1342, 0.0230],
        [0.1337, 0.0217]], grad_fn=<SliceBackward0>)

In [24]:
model_cf(X_train.to(device))[0].shape, NUM_CLASSES

(torch.Size([2]), 2)

In [40]:
# Create the training and testing loop
torch.manual_seed(RANDOM_SEED)

epochs = 100

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    ### Training
    model_cf.train()

    # 1. Forward pass
    train_logits = model_cf(X_train).squeeze() # model outputs raw logits
    train_preds = torch.softmax(train_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels

    # 2. Calculate loss and accuracy
    loss = loss_fn(train_logits, y_train.type(torch.FloatTensor))
    acc = accuracy_fn(y_true=y_train.type(torch.FloatTensor),
                 y_pred=train_preds.type(torch.FloatTensor)
                )

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    ### Testing
    model_cf.eval()

    with torch.inference_mode():
        # 1. Forward pass
        test_logits = model_cf(X_test).squeeze()
        test_preds = torch.softmax(test_logits, dim=1).argmax(dim=1)
        
        # 2. Calculate loss and accuracy
        test_loss = loss_fn(test_logits, y_test.type(torch.FloatTensor))
        test_acc = accuracy_fn(y_true=y_test.type(torch.FloatTensor),
                          y_pred=test_preds.type(torch.FloatTensor)
                         )

        # Print what's happening every 10 epochs
        if epoch % 10 == 0:
            print(f"Train loss: {loss} | Train accuracy: {acc}% | Test loss: {test_loss} | Test accuracy: {test_acc}%")
    
    


Train loss: 0.0016468979883939028 | Train accuracy: 100.0% | Test loss: 0.006987008266150951 | Test accuracy: 100.0%
Train loss: 0.0015970219392329454 | Train accuracy: 100.0% | Test loss: 0.006834934465587139 | Test accuracy: 100.0%
Train loss: 0.0015497896820306778 | Train accuracy: 100.0% | Test loss: 0.006688508205115795 | Test accuracy: 100.0%
Train loss: 0.001505016814917326 | Train accuracy: 100.0% | Test loss: 0.006550604477524757 | Test accuracy: 100.0%
Train loss: 0.0014625245239585638 | Train accuracy: 100.0% | Test loss: 0.006415564566850662 | Test accuracy: 100.0%
Train loss: 0.0014220901066437364 | Train accuracy: 100.0% | Test loss: 0.006287767086178064 | Test accuracy: 100.0%
Train loss: 0.0013835870195180178 | Train accuracy: 100.0% | Test loss: 0.00616812240332365 | Test accuracy: 100.0%
Train loss: 0.001346956705674529 | Train accuracy: 100.0% | Test loss: 0.006053464487195015 | Test accuracy: 100.0%
Train loss: 0.0013120354851707816 | Train accuracy: 100.0% | Test l

In [26]:
# Save the model
from pathlib import Path

# 1.Models directory
MODEL_PATH = Path("models")

# 2. Create model save path
MODEL_NAME = "word_classifier_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_cf.state_dict(), # only saving the state_dict() only saves the models learned parameters
           f=MODEL_SAVE_PATH)

Saving model to: models/word_classifier_model_0.pth


In [27]:
# Load and use model
# Create a new instance of our model class
loaded_model_cf = WordCFModel(input_features=NUM_FEATURES,
                              output_features=NUM_CLASSES
                             ).to(device)

# Load saved model state_dict
loaded_model_cf.load_state_dict(torch.load(f=MODEL_SAVE_PATH), strict=False)

loaded_model_cf

WordCFModel(
  (layer_stack): Sequential(
    (0): Linear(in_features=26, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=2, bias=True)
  )
)

In [28]:
# Use it for inference
loaded_model_cf.eval()

with torch.inference_mode():
    # 1. Forward pass
    y_logits = loaded_model_cf(X_test)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

    # 2. Calculate accuracy
    acc = accuracy_fn(y_true=y_test.type(torch.FloatTensor),
                             y_pred=y_preds.type(torch.FloatTensor)
                         )
    

# Print the result
print(f"Accuracy: {acc}% ")
X_test, y_preds

Accuracy: 100.0% 


(tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]]),
 tensor([1, 1, 0, 0]))

In [38]:
# Test on new data

# Create the input data
raw_inputs = ["Cop", "nut", "cun", "xed", "cak", "cit"]

# Convert the inputs to their corrsponding one-hot encoding format
total_words = len(raw_inputs)

char_vectors = [] # <- create a list to store the vectors

for w in range(total_words):
    char_vectors.append(char_to_vec(raw_inputs[w][0]))       
  
# Convert the lists to numpy arrays
char_vectors_array = np.array(char_vectors)

# Turn them into tensors
X = torch.from_numpy(char_vectors_array).type(torch.float)

X_inf = X.to(device)


In [39]:
# Use it for inference
loaded_model_cf.eval()

with torch.inference_mode():
    # 1. Forward pass
    y_logits = loaded_model_cf(X_inf)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

y_preds    

tensor([1, 0, 1, 0, 1, 1])