# Segment 3 Lab

## Let's apply a Neural Network to the product price example from Segment 2

In [None]:
# imports - now including pytorch

import os
import random
from dotenv import load_dotenv
from huggingface_hub import login
from items import Item
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import pickle
import json
import csv
from sklearn.feature_extraction.text import CountVectorizer
from testing import Tester
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchviz import make_dot
from IPython.display import display, SVG
from sklearn.model_selection import train_test_split

In [None]:
# Load in dataset
# Sidenote: this is actually a larger dataset than before (about twice as large)

with open('../train.pkl', 'rb') as file:
    train = pickle.load(file)

with open('../test.pkl', 'rb') as file:
    test = pickle.load(file)

In [None]:
len(train)

# Wait - just before we make our Neural Network

There is a different kind of Neural Network that I'd like to put to the test.

In [None]:
# Read in results from a Neural Network of the non-artificial kind!!

human_predictions = []
with open('human_output.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        human_predictions.append(float(row[1]))

In [None]:
def human(item):
    idx = test.index(item)
    return human_predictions[idx]

In [None]:
Tester.test(human, test)

# OK now let's go Artificial!

In [None]:
# Prepare our documents and prices

y = np.array([float(item.price) for item in train])
documents = [item.text for item in train]

In [None]:
# Use the CountVectorizer for a Bag of Words model
# Using binary=True with the CountVectorizer makes "one-hot vectors"

np.random.seed(42)
vectorizer = CountVectorizer(max_features=2000, stop_words='english', binary=True)
X = vectorizer.fit_transform(documents)

In [None]:
# Define the neural network - here is Pytorch code to create a 6 layer neural network


class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 64)
        self.layer4 = nn.Linear(64, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        output1 = self.relu(self.layer1(x))
        output2 = self.relu(self.layer2(output1))
        output3 = self.relu(self.layer3(output2))
        output4 = self.relu(self.layer4(output3))
        output5 = self.relu(self.layer5(output4))
        output6 = self.layer6(output5)
        return output6

params = 2000 * 128 + 128 * 64 + 64 * 64 * 3 + 64
print(f"There are {params:,} parameters in our neural network")

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X.toarray())
y_train_tensor = torch.FloatTensor(y).unsqueeze(1)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.05, random_state=42)

# Create the loader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Initialize the model
input_size = X_train_tensor.shape[1]
model = NeuralNetwork(input_size)

# Create a dummy input and visualize the network
x = torch.randn(1, input_size)
dot = make_dot(model(x), params=dict(model.named_parameters()))
display(SVG(dot.pipe(format='svg')))

# Time for the 4 steps of training

We now carry out the 4 steps of training, for chunks of our data at a time (called 'batches')
1. Forward pass
2. Loss calculation
3. Backward pass (calculate gradients)
4. Optimization (shift the weights a step in the right direction to do better next time)

### The Learning Rate

When Optimizing, we need to decide how big a step to take each time. This is called the "Learning Rate" and it's an important hyperparameter. We will try 0.001.

### Epochs

We will run through the entire dataset several times; each complete run is called an "Epoch"

In [None]:
# Define loss function and optimizer

loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# We will do 2 complete runs through the data

EPOCHS = 2

for epoch in range(EPOCHS):
    model.train()
    for batch_X, batch_y in tqdm(train_loader):
        optimizer.zero_grad()

        # The next 4 lines are the 4 stages of training: forward pass, loss calculation, backward pass, optimize
        
        outputs = model(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = loss_function(val_outputs, y_val)

    print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.3f}, Val Loss: {val_loss.item():.3f}')

In [None]:
def neural_network(item):
    model.eval()
    with torch.no_grad():
        vector = vectorizer.transform([item.text])
        vector = torch.FloatTensor(vector.toarray())
        result = model(vector)[0].item()
    return max(0, result)

In [None]:
test[0].text

In [None]:
test[0].price

In [None]:
neural_network(test[0])

In [None]:
Tester.test(neural_network, test)

# Exercise: Make a Deeper Neural Network!

See how many layers you can add - you may need to look up some tricks to avoid over-fitting - also increase the vocab size