In [252]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split

n_samples=1000

#create circles
X, y = make_circles (n_samples, noise=0.03, random_state=42)
# print(f'First five samples of X: {X[:5]}')
# print(f'First five samples of y: {y[:5]}')

In [253]:
circles = pd.DataFrame({"X1": X[:, 0],
                        "X2": X[:, 1],
                        "label": y})
#circles.head(10)
#plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap=plt.cm.RdYlBu)

#check input and output shapes
X.shape, y.shape
X_sample = X[0]
y_sample=y[0]
#print(f'First sample of X: {X_sample} and first sample of y: {y_sample}')
#print(f'Shape for one sample of X: {X_sample.shape} and shape of one sample of y: {y_sample.shape}')

#Turn data into tensors
X=torch.from_numpy(X).type(torch.float32)
y=torch.from_numpy(y).type(torch.float32)

#Split data into training and test sets
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42)

In [254]:
#Make device agnostic code
device='cuda' if torch.cuda.is_available() else 'cpu'

#Construct a model
class CircleModelV0(nn.Module):
  def __init__(self):
    super().__init__()
    #create nn.Linear layers.
    self.layer_1=nn.Linear(in_features=2, out_features=10) # in_features=2, because we have two input features X[:, 0], X[:, 1]
    self.layer_2=nn.Linear(in_features=10, out_features=10) # out_features=1, because we have single output label, y
    self.layer_3=nn.Linear(in_features=10, out_features=1)
    self.relu=nn.ReLU()               #ReLu is a non-linear activation function

  def forward(self,x):
    return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x))))) # x -> layer 1 -> layer 2 -> y

model_0=CircleModelV0().to(device)
model_0


CircleModelV0(
  (layer_1): Linear(in_features=2, out_features=10, bias=True)
  (layer_2): Linear(in_features=10, out_features=10, bias=True)
  (layer_3): Linear(in_features=10, out_features=1, bias=True)
  (relu): ReLU()
)

In [255]:
#Another way of constructing the model: NOT NEEDED
'''
model_0=nn.Sequential(
   nn.Linear(in_features=2, out_features=5),
   nn.Linear(in_features=5, out_features=1)
).to(device)
'''

'\nmodel_0=nn.Sequential(\n   nn.Linear(in_features=2, out_features=5),\n   nn.Linear(in_features=5, out_features=1)\n).to(device)\n'

In [256]:
next(model_0.parameters()).device
#model_0.state_dict()

#X_test=X_test.to(device)
X_test.device

next(model_0.parameters()).device

device(type='cpu')

In [257]:
#Make some prediction
with torch.inference_mode():
  untrained_preds = model_0(X_test.to(device))
print(f'Length of predictions: {len(untrained_preds)}, Shape: {untrained_preds.shape}')
print(f'Length of test samples: {len(X_test)}, Shape: {X_test.shape}')
print(f'\nFirst 10 predictions: \n{untrained_preds[:10]}')
print(f'\nFirst 10 labels \n {y_test[:10]}')

Length of predictions: 200, Shape: torch.Size([200, 1])
Length of test samples: 200, Shape: torch.Size([200, 2])

First 10 predictions: 
tensor([[0.0167],
        [0.0207],
        [0.0126],
        [0.0171],
        [0.0420],
        [0.0239],
        [0.0315],
        [0.0330],
        [0.0116],
        [0.0208]])

First 10 labels 
 tensor([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.])


In [258]:
#Setup the loss function
loss_fn = nn.BCEWithLogitsLoss()  #Sigmoid activation function for binary classification
                                  #Softmax activation function for multiclass classification
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1) # Most common: SGD and Adam

def accuracy_fn(y_true, y_pred):
  correct=torch.eq(y_true, y_pred).sum().item()
  acc=(correct/len(y_pred))*100
  return acc

In [259]:
#View the first 5 outputs of the forward pass on the test data
'''
model_0.eval()
with torch.inference_mode():
  y_logits=model_0(X_test.to(device))[:5]
  y_pred_probs = torch.sigmoid (y_logits)
  y_pred_labels=torch.round(y_pred_probs)
'''
#y_pred_labels
#y_test[:5]
#raw logits = model output without being passed through any activation function
#raw logits -> prediction probabilities -> prediction labels
#Use the Sigmoid activation function (for binary classification) to trun logits -> prediction probabilities


'\nmodel_0.eval()\nwith torch.inference_mode():\n  y_logits=model_0(X_test.to(device))[:5]\n  y_pred_probs = torch.sigmoid (y_logits)\n  y_pred_labels=torch.round(y_pred_probs)\n'

In [262]:
#Building a training and test loop
torch.manual_seed(42)
torch.cuda.manual_seed(42)
epochs=1000
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
  #Train
  model_0.train()
  y_logits = model_0(X_train).squeeze()          #forward pass
  y_pred = torch.round(torch.sigmoid(y_logits))  #nn.BCEWithogitsLoss expects raw logits as input

  loss= loss_fn(y_logits, y_train)               #calculate the loss/accuracy
  acc=accuracy_fn(y_true=y_train, y_pred=y_pred)

  optimizer.zero_grad()                          #Optimized zero grad
  loss.backward()                                #Loss backward
  optimizer.step()                               #Optimizer step step step

  #Test
  model_0.eval()
  with torch.inference_mode():
    test_logits=model_0(X_test).squeeze()          #Forward pass
    test_pred = torch.round(torch.sigmoid(test_logits))

    test_loss=loss_fn(test_logits, y_test)
    test_acc=accuracy_fn(y_true=y_test, y_pred=test_pred)


  if epoch % 100 ==0:
    print(f'Epoch: {epoch} | Training loss: {loss: 0.5f}, Training acc: {acc:.2f}% | Test loss: {test_loss: 0.5f}, Test acc: {test_acc:.2f}%')


Epoch: 0 | Training loss:  0.68102, Training acc: 52.75% | Test loss:  0.67941, Test acc: 56.50%
Epoch: 100 | Training loss:  0.67515, Training acc: 54.50% | Test loss:  0.67285, Test acc: 56.00%
Epoch: 200 | Training loss:  0.66659, Training acc: 58.38% | Test loss:  0.66322, Test acc: 59.00%
Epoch: 300 | Training loss:  0.65160, Training acc: 64.00% | Test loss:  0.64757, Test acc: 67.50%
Epoch: 400 | Training loss:  0.62362, Training acc: 74.00% | Test loss:  0.62145, Test acc: 79.00%
Epoch: 500 | Training loss:  0.56818, Training acc: 87.75% | Test loss:  0.57378, Test acc: 86.50%
Epoch: 600 | Training loss:  0.48153, Training acc: 93.50% | Test loss:  0.49935, Test acc: 90.50%
Epoch: 700 | Training loss:  0.37056, Training acc: 97.75% | Test loss:  0.40595, Test acc: 92.00%
Epoch: 800 | Training loss:  0.25458, Training acc: 99.00% | Test loss:  0.30333, Test acc: 96.50%
Epoch: 900 | Training loss:  0.17180, Training acc: 99.50% | Test loss:  0.22108, Test acc: 97.50%


In [263]:
#Evaluating the model
model_0.eval()
with torch.inference_mode():
  y_preds = torch.round(torch.sigmoid(model_0(X_test))).squeeze()
y_preds[:10], y_test[:10]


(tensor([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.]),
 tensor([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.]))