# 02. Neural Network classification with PyTorch

Classification predicts what something is from a list.

In [None]:
## 1. Classification data
from sklearn.datasets import make_circles

# samples
n_samples = 1000
# create circles
X, y = make_circles(n_samples, noise=0.03, random_state=42)

In [None]:
len(X), len(y)

In [None]:
print(f"first 5 samples of X:\n {X[:5]}")
print(f"first 5 samples of y:\n {y[:5]}")

In [None]:
# Make dataframe
import pandas as pd
circles = pd.DataFrame({"X1": X[:, 0],
                       "X2": X[:, 1],
                       "label": y})
circles.head(10)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(x=X[:, 0],
            y=X[:,1],
            c=y,
            cmap=plt.cm.RdYlBu)

# Checking import and output shapes


In [None]:
X.shape, y.shape

In [None]:
# frist example of features and labels
X_sample = X[0]
y_sample = y[0]

print(f"Values for one sample of X: {X_sample} and y {y_sample}")
print(f"Shapes for one sample of X: {X_sample.shape} and y {y_sample.shape}")

### Convernt data into tensors

In [None]:
import torch
torch.__version__

In [None]:
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

In [None]:
X[:5], y[:5]

In [None]:
type(X), X.type, y.type

In [None]:
# Split data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [None]:
len(X_train), len(X_test), len(y_train), len(y_test)

# Building model

1. Setup device agnositic code
2. Construct model from nn.Module
3. Define loss func and optimizers
4. Create training and test loops

In [None]:
# 1. setup device agnostic code
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
device

# 2. Construct model

In [None]:
# 1. construct a model
class CircleModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        # 2. 2 nn.linear layers
        self.layer_1 = nn.Linear(in_features=2, out_features=5) # takes 2 features in, scales to 5 features
        self.layer_2 = nn.Linear(in_features=5, out_features=1) # takes 5 features, classification to 1 (shape of y)


    # 3. define a forward()
    def forward(self, x):
      return self.layer_2(self.layer_1(x))    # x -> layer 1 -> layer 2 -> output (classification)
    
# 4.instantiate an instance of model class and send it to the target device
model_0 = CircleModelV0().to(device)
model_0

In [None]:
next(model_0.parameters()).to(device)

In [None]:
# nn.Sequential() can be used for similar models
# model_0 = nn.Sequential(
#     nn.Linear(in_features=2, out_features=5),
#     nn.Linear(in_features=5, out_features=1)
# ).to(device)
# model_0

In [None]:
model_0.state_dict()

In [None]:
# Predictions
with torch.inference_mode():
  untrained_preds = model_0(X_test.to(device))
print(f"Len of prediction: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Len of test samples: {len(X_test)}, Shape: {X_test.shape}")
print(f"\n First 10 predictions:\n{torch.round(untrained_preds[:10])}")
print(f"\n First 10 labelss:\n{y_test[:10]}")

# loss function and optimizer
loss function
* regression MAE or MSE (mean absolute err or mean squared err)  
* classicification binary cross entropy or categorical cross entropy  

optimizers
* SGD
* Adam


In [None]:
# loss
loss_fn = torch.nn.BCEWithLogitsLoss() # sigmoid activation function included
# optimizer
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.1)


In [None]:
# accuracy
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct/len(y_pred)) * 100
    return acc

# Train model

1. forward pass
2. calc loss
3. optimize zero grad
4. loss backward
5. optimizer step

In [None]:
# 
model_0.eval()
with torch.inference_mode():
  y_logits = model_0(X_test.to(device))[:5]
y_logits

In [None]:
# sigmoid activiation function on model logits turns them into prediction probabilities
# range-style round is preformed on prediction probabliy values 
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs

In [None]:
# find predicted labels
y_preds = torch.round(y_pred_probs)

# logits -> pred probs -> pred labels
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))[:5]))

# check equal
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

# get rid of the extra dimension
y_preds.squeeze()

# building a train and test loop

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 100

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    model_0.train()

    y_logits = model_0(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits))

    loss = loss_fn(y_logits,
                   y_train)
    
    acc = accuracy_fn(y_true=y_train, 
                      y_pred=y_preds)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model_0.eval()
    with torch.inference_mode():
        test_logits = model_0(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))

        test_loss = loss_fn(test_logits,
                            y_test)
        
        
        test_acc = accuracy_fn(y_true=y_test,
                               y_pred=test_pred)
        
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}")
        
        

## make predictions and eval the model

based on metric the model is not learning

In [None]:
from helper_functions import plot_predictions, plot_decision_boundary


In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1,2,1)
plt.title("Train")
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1,2,2)
plt.title("Test")
plot_decision_boundary(model_0, X_test, y_test)

# Improving a model

Change the hyperparameters
* add more layers - gives model more chances to learn patterns in the data
* add more hidden units - go from 5 hidden to 10 hidden
* Fit for longer - increase epochs
* Change activation function
* change learning rate
* change loss function

Attempt 1
* add more hidden units: 5 -> 10
* increase number of layers: 2 -> 3
* increase number of epochs: 100 -> 1000

In [None]:
class CircleModelV1(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)

    def forward(self, x):
        # z = self.layer_1(x)
        # z = self.layer_2(z)
        # z = self.layer_3(z)
        return self.layer_3(self.layer_2(self.layer_1(x)))
    
model_1 = CircleModelV1().to(device)
model_1

In [None]:
# loss
loss_fn = torch.nn.BCEWithLogitsLoss() # sigmoid activation function included
# optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(),
                            lr=0.01)

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 1000

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    model_1.train()

    y_logits = model_1(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits))

    loss = loss_fn(y_logits,
                   y_train)
    
    acc = accuracy_fn(y_true=y_train, 
                      y_pred=y_preds)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model_1.eval()
    with torch.inference_mode():
        test_logits = model_1(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))

        test_loss = loss_fn(test_logits,
                            y_test)
        
        
        test_acc = accuracy_fn(y_true=y_test,
                               y_pred=test_pred)
        
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}")
        
       

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1,2,1)
plt.title("Train")
plot_decision_boundary(model_1, X_train, y_train)
plt.subplot(1,2,2)
plt.title("Test")
plot_decision_boundary(model_1, X_test, y_test)

In [None]:
# create linear data
weight = 0.7
bias = 0.3
start = 0 
end = 1
step = 0.01

X_regression = torch.arange(start,end,step).unsqueeze(dim=1)
y_regression = weight * X_regression + bias

print(len(X_regression))
X_regression[:5], y_regression[:5]

In [None]:
# train test split
train_split = int(0.8 * len(X_regression))
X_train_regression, y_train_regression = X_regression[:train_split], y_regression[:train_split]
X_test_regression, y_test_regression = X_regression[train_split:], y_regression[train_split:]

# lengs
len(X_train_regression), len(X_test_regression), len(y_train_regression), len(y_test_regression)

In [None]:
plot_predictions(train_data=X_train_regression, train_labels=y_train_regression, 
                 test_data=X_test_regression, test_labels=y_test_regression)


In [None]:
model_1

In [None]:
model_2 = nn.Sequential(
    nn.Linear(in_features=1, out_features=10),
    nn.Linear(in_features=10, out_features=10),
    nn.Linear(in_features=10, out_features=1)
).to(device)
model_2

In [None]:
# loss
loss_fn = nn.L1Loss() # sigmoid activation function included
# optimizer
optimizer = torch.optim.SGD(params=model_2.parameters(),
                            lr=0.01)

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 1000

X_train_regression, y_train_regression = X_train_regression.to(device), y_train_regression.to(device)
X_test_regression, y_test_regression = X_test_regression.to(device), y_test_regression.to(device)

for epoch in range(epochs):
    model_2.train()
    y_pred = model_2(X_train_regression)
    loss = loss_fn(y_pred, y_train_regression)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model_2.eval()
    with torch.inference_mode():
        test_pred = model_2(X_test_regression)
        test_loss = loss_fn(test_pred, y_test_regression)
        
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}| Test loss: {test_loss:.5f}")
        
       

In [None]:
# Evaluate model
model_2.eval()

# predictions
with torch.inference_mode():
    y_pred = model_2(X_test_regression)

# plots
plot_predictions(train_data=X_train_regression.cpu(), 
                 train_labels=y_train_regression.cpu(),
                 test_data=X_test_regression.cpu(),
                 test_labels=y_test_regression.cpu(),
                 predictions=y_pred.cpu())

# non-linearity
* paterns drawn with striaght and non-straight lines
* linear and non-linear functions

In [None]:
# make a plot new data

import matplotlib.pyplot as plt
from sklearn.datasets import make_circles

n_samples = 1000

X, y = make_circles(n_samples=n_samples,
                    noise=0.03,
                    random_state=42)

plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu)

In [None]:
# train test
import torch
from sklearn.model_selection import train_test_split

# data to tensor
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# split train test
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

X_test[:5], y_train[:5]


# model with non-linearity

* Linear = straight lines
* Non-linear = non-straight (curves)


In [None]:
# model with non-linear activations 
from torch import nn

class CircleModelV2(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)
        self.relu = nn.ReLU() # non-linear activaction function

    def forward(self,x):
        return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))

model_3 = CircleModelV2().to(device)
model_3

In [None]:
# loss
loss_fn = torch.nn.BCEWithLogitsLoss() # sigmoid activation function included
# optimizer
optimizer = torch.optim.SGD(params=model_3.parameters(),
                            lr=0.1)

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [None]:
epochs = 1000

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device) 

for epoch in range(epochs):
    model_3.train()
    y_logits = model_3(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits))
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_true=y_train, y_pred=y_preds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model_3.eval()
    with torch.inference_mode():
        test_logits = model_3(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)
        
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}")

In [None]:
model_3.eval()
with torch.inference_mode():
    y_preds = torch.round(torch.sigmoid(model_3(X_test))).squeeze()
y_preds[:10], y_test[:10]

In [None]:
# visual evaluation
plt.figure(figsize=(12, 6))
plt.subplot(1,2,1)
plt.title("Train")
plot_decision_boundary(model_3, X_train, y_train)
plt.subplot(1,2,2)
plt.title("Test")
plot_decision_boundary(model_3, X_test, y_test)