1. make classification data ready

In [None]:
import  sklearn
from sklearn.datasets import make_circles

In [None]:
#binary classification
x,y=make_circles(1000, noise=0.03, random_state=42)

In [None]:
len(x),len(y)

In [None]:
print(x[:5])

In [None]:
print(y[:5])

In [None]:
import pandas as pd
circles=pd.DataFrame({"X1": x[:,0],
                      "X2": x[:,1],
                      "label":y})
circles.head(10)

In [None]:
#visualize
import matplotlib.pyplot as plt
plt.scatter(x=circles["X1"],
            y=circles["X2"],
            c=circles["label"],
            cmap=plt.cm.RdYlBu)
plt.show()

# classify data as red or blue

In [None]:
#turn data to tensors & create train,test splits
x.shape,y.shape

In [None]:
import torch

In [None]:
x=torch.from_numpy(x).type(torch.float)
y=torch.from_numpy(y).type(torch.float)

In [None]:
#split
from sklearn.model_selection import train_test_split

x_train,x_test, y_train,y_test=train_test_split(x,
                                                y,
                                                test_size=0.2,
                                                random_state=42)


2. model

In [None]:
import torch
from torch import nn
device="cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
#construct model:
class CircleModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        #create nn linear layers
        self.layer_1=nn.Linear(in_features=2,out_features=5)#takes in 2 features, outputs 5
        self.layer_2=nn.Linear(in_features=5,out_features=1)#takes 5 from previous layer and outputs 1

    def forward(self,x):
        return self.layer_2(self.layer_1(x))# x-> layer 1->layer 2->output

model_0=CircleModelV0().to(device)
model_0



Number of layers controls how deep the network is — how many levels of abstraction it can learn.

Fewer layers → simpler patterns (like straight lines).

More layers → more complex patterns (like curves, shapes, or images).

out_features (neurons in a layer) control how wide each layer is — how much learning capacity it has at that level.

Fewer neurons → simpler relationships, faster training.

More neurons → can capture richer patterns but may overfit if too many.

In [None]:
#replicate using nn.Sequential(), automatically codes class, equivalent to class CircleModelV0
model_0=nn.Sequential(
    nn.Linear(in_features=2,out_features=5),
    nn.Linear(in_features=5,out_features=1)
).to(device)
model_0

In [None]:
model_0.state_dict()
#layer1-> 2*5=10 which are 0.weight, 5 output tensors of first layer(0.bias)

In [None]:
#make predicitons
with torch.inference_mode():
  untrained_preds=model_0(x_test.to(device))

untrained_preds[:10], untrained_preds.shape, y_test[:10]

##loss function and optimizer
- loss for classification is not the same as for regression:
- binary cross entropy/categorical cross entropy

In [None]:
#use torch.optim and torch.nn.BEXWithLogitsLoss

#loss
loss_fn=nn.BCEWithLogitsLoss()#has sigmoid activation function built in

#optimizer
optimizer=torch.optim.SGD(params=model_0.parameters(),
                          lr=0.1)

In [None]:
#calculate accuracy
def acc_fn(y_true,y_pred):
  correct=torch.eq(y_true, y_pred).sum().item()
  acc=correct/len(y_pred)*100
  return acc


# train model

### steps:
raw logits-> prediction probabilities -> prediction labels
- convert logits to probbilities by passing them through some kinf of activation function(sigmoid for binary classification and softmax for multiclass classification)
- then convert models prediction probabilities to prediction labels by either rounding them or taking the argmax()

In [None]:
#train and test loop
torch.manual_seed(42)
torch.cuda.manual_seed(42)#for doing operations on cuda device
epochs=1000

x_train,y_train=x_train.to(device), y_train.to(device)
x_test,y_test=x_test.to(device), y_test.to(device)

for epoch in range(epochs):
  model_0.train()

  #forward pass
  y_logits=model_0(x_train).squeeze()
  y_pred=torch.round(torch.sigmoid(y_logits))

  #calculate loss
  loss=loss_fn(y_logits,y_train)#requires raw logits
  acc= acc_fn(y_train, y_pred)

  #optimizer
  optimizer.zero_grad()

  #loss backwards(backprop)
  loss.backward()

  #optimizer step(gradient descent)
  optimizer.step()

  ###test
  model_0.eval()
  with torch.inference_mode():
    test_logits=model_0(x_test).squeeze()
    test_pred=torch.round(torch.sigmoid(test_logits))

    #calculate loss
    test_loss=loss_fn(test_logits,y_test)
    test_acc=acc_fn(y_test,test_pred)

    if epoch%10==0:
      print(f"Epoch: {epoch} | Loss: {loss:.5f} | Acc: {acc:.2f}% | Test Loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}%")







In [None]:
#visulaize to fix
import requests
from pathlib import Path

#download helper function
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists")
else:
  print("downloading helper_functions.py")
  request=requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/refs/heads/main/helper_functions.py")
  with open("helper_functions.py","wb") as f:
    f.write(request.content)

from helper_functions import plot_predictions, plot_decision_boundary

In [None]:
#plot_decision_boundary
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plot_decision_boundary(model_0, x_train, y_train)
plt.subplot(1,2,2)
plot_decision_boundary(model_0, x_test, y_test)
plt.show()

###improving model(improve through experimentation), options:
1. add more layers
2. add more hidden units- from 5 to 10
3. fit for longer
4. change the activation functions
5. change learning rate
6. change the loss function



In [None]:
#problem is using linear, build model with non-linearity
class CircleModelV1(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1=nn.Linear(in_features=2,out_features=32)
    self.layer_2=nn.Linear(in_features=32,out_features=32)
    self.layer_3=nn.Linear(in_features=32,out_features=1)
    self.relu=nn.ReLU()#non linear activation function

  def forward(self,x):
    return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))

model_1=CircleModelV1().to(device)
model_1



- ReLU helps the model learn features.
- Sigmoid (or Softmax) helps interpret the result as a probability.

In [None]:
#setup loss and optimizer
loss_fn=nn.BCEWithLogitsLoss()
optimizer=torch.optim.SGD(model_1.parameters(), lr=0.1)

In [None]:
#train
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs=1000

for epoch in range(epochs):
  model_1.train()
  #forward pass
  y_logits=model_1(x_train).squeeze()
  y_pred=torch.round(torch.sigmoid(y_logits))
  #loss
  loss=loss_fn(y_logits,y_train)
  #acc
  acc=acc_fn(y_train,y_pred)

  #optimizer
  optimizer.zero_grad()

  #loss backwards
  loss.backward()

  #optimizer step
  optimizer.step()

  model_1.eval()
  with torch.inference_mode():
    test_logits=model_1(x_test).squeeze()
    test_pred=torch.round(torch.sigmoid(test_logits))
    test_loss=loss_fn(test_logits,y_test)
    test_acc=acc_fn(y_test,test_pred)


  if epoch%100==0:
    print(f"Epoch: {epoch} | Loss: {loss:.5f} | Acc: {acc:.2f}% | Test Loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}%")




In [None]:
#prediction
model_1.eval()
with torch.inference_mode():
  y_preds=torch.round(torch.sigmoid(model_1(x_test.to(device)))).squeeze()

y_preds[:10], y_test[:10]

In [None]:
#plot
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plot_decision_boundary(model_0, x_train, y_train)
plt.subplot(1,2,2)
plot_decision_boundary(model_1, x_test, y_test)
plt.show()

#mutliclass classification problem

In [None]:
#create toy multiclass dataset
import torch
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

#set hyperparameters from documentation
Num_classes=4
Num_features=2



#create
X_blob, y_blob=make_blobs(n_samples=1000,
                          n_features=Num_features,
                          centers=Num_classes,
                          cluster_std=1.5,
                          random_state=42)

#turn to tensors
X_blob=torch.from_numpy(X_blob).type(torch.float)
y_blob=torch.from_numpy(y_blob).type(torch.LongTensor)

#split
X_train, X_test, y_train, y_test=train_test_split(X_blob,
                                                  y_blob,
                                                  test_size=0.2,
                                                  random_state=42)

#visualize
plt.figure(figsize=(10,7))
plt.scatter(X_blob[:,0], X_blob[:,1], c=y_blob, cmap=plt.cm.RdYlBu)

In [None]:
#build model
#output features=number of classes
class Multiclass(nn.Module):
  def __init__(self, input_features, output_features, hidden_units=8):
    super().__init__()
    self.linear_layer_stack=nn.Sequential(
        nn.Linear(in_features=input_features, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=output_features)
    )

  def forward(self,x):
    return self.linear_layer_stack(x)

model_2=Multiclass(input_features=2,
                   output_features=4,
                   hidden_units=8).to(device)
model_2





In [None]:
#loss function, optimizer
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model_2.parameters(), lr=0.1)

if you have a dataset that have imbalance number of samples for each class add weights to crossentropyloss

In [None]:
#training loop
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs=100

X_train, X_test, y_train, y_test= X_train.to(device), X_test.to(device), y_train.to(device), y_test.to(device)


for epoch in range(epochs):
  model_2.train()
  #forward pass
  y_logits=model_2(X_train)
  y_pred=torch.softmax(y_logits, dim=1).argmax(dim=1)
  #loss
  loss=loss_fn(y_logits,y_train)
  acc=acc_fn(y_train,y_pred)

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  model_2.eval()
  with torch.inference_mode():
    test_logits=model_2(X_test)
    test_pred=torch.softmax(test_logits, dim=1).argmax(dim=1)
    test_loss=loss_fn(test_logits,y_test)
    test_acc=acc_fn(y_test,test_pred)
    if epoch%10==0:
      print(f"Epoch: {epoch} | Loss: {loss:.5f} | Acc: {acc:.2f}% | Test Loss: {test_loss} | test acc:{test_acc}")



In [None]:
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plot_decision_boundary(model_2, X_train, y_train)
plt.subplot(1,2,2)
plot_decision_boundary(model_2, X_test, y_test)
plt.show()