# SVM Implementation PyTorch

In [28]:
# Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [29]:
import pandas as pd
import numpy as np

df = pd.read_csv("./data/Admission_Predict.csv")
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [30]:
df['Chance of Admit ']  = df['Chance of Admit '].apply(lambda x: 1 if x >= 0.5 else 0)

In [31]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,1
1,2,324,107,4,4.0,4.5,8.87,1,1
2,3,316,104,3,3.0,3.5,8.0,1,1
3,4,322,110,3,3.5,2.5,8.67,1,1
4,5,314,103,2,2.0,3.0,8.21,0,1


In [32]:
df.iloc[:,-1].value_counts()

1    367
0     33
Name: Chance of Admit , dtype: int64

In [33]:
df.columns

Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')

In [34]:
X, y = df.drop('Chance of Admit ', axis = 1), df['Chance of Admit ']
X.head(), y.head()

(   Serial No.  GRE Score  TOEFL Score  University Rating  SOP  LOR   CGPA  \
 0           1        337          118                  4  4.5   4.5  9.65   
 1           2        324          107                  4  4.0   4.5  8.87   
 2           3        316          104                  3  3.0   3.5  8.00   
 3           4        322          110                  3  3.5   2.5  8.67   
 4           5        314          103                  2  2.0   3.0  8.21   
 
    Research  
 0         1  
 1         1  
 2         1  
 3         1  
 4         0  ,
 0    1
 1    1
 2    1
 3    1
 4    1
 Name: Chance of Admit , dtype: int64)

In [35]:
X.shape, y.shape

((400, 8), (400,))

In [36]:
vars = X.columns
for var in vars:
    X[f"{var}"] = X[f"{var}"]/ X[f"{var}"].max()

In [37]:
X.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,0.0025,0.991176,0.983333,0.8,0.9,0.9,0.972782,1.0
1,0.005,0.952941,0.891667,0.8,0.8,0.9,0.894153,1.0
2,0.0075,0.929412,0.866667,0.6,0.6,0.7,0.806452,1.0
3,0.01,0.947059,0.916667,0.6,0.7,0.5,0.873992,1.0
4,0.0125,0.923529,0.858333,0.4,0.4,0.6,0.827621,0.0


## Prepare DataLoader

In [38]:
# Data Preprocessing
# Generate a synthetic dataset for demonstration
# X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Convert data to PyTorch tensors
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.long)

In [39]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32
train_data_loader = DataLoader(dataset = X_train,
                              batch_size = BATCH_SIZE,
                              shuffle = True)
test_data_loader = DataLoader(dataset = X_test,
                              batch_size = BATCH_SIZE,
                              shuffle = True)

In [40]:
# Let's check out what we've create
print(f"DataLoader: {train_data_loader, test_data_loader}")
print(f"Length of train dataloader: {len(train_data_loader)} of BATCH_SIZE: {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_data_loader)} of BATCH_SIZE: {BATCH_SIZE}")

DataLoader: (<torch.utils.data.dataloader.DataLoader object at 0x0000029D691D4DC0>, <torch.utils.data.dataloader.DataLoader object at 0x0000029D6CF72DD0>)
Length of train dataloader: 10 of BATCH_SIZE: 32
Length of test dataloader: 3 of BATCH_SIZE: 32


In [41]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [42]:
# SVM Classifier Model
class SVM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SVM, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

# Create an SVM model instance
model = SVM(input_size=8, hidden_size=10, output_size=1)

In [55]:
def accuracy_fn(y_test, y_test_pred):
    return (y_test_pred.round() == y_test).float().mean()


def train_step(model: torch.nn.Module,
               data_loader : torch.utils.data.DataLoader,
               y: torch.Tensor,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
  """Calculates the training computation of a PyTorch model
  Arge:
    model: A pytorch class for model
    data_loader: The dataloader of pytorch
    loss_fn: Loss function for the model
    optimzer: optimizer of the loss
    accuracy_fn: Accuracy function for calculation
    devic: Device agnostic code cuda or cpu
  """
  train_loss, train_acc = 0, 0
  model.to(device)
  for batch, X in enumerate(data_loader):
    X, y = X.to(device), y.to(device)
    # 1. Forward pass
    y_pred = model(X)
    # 2. Calculate the loss
    loss = loss_fn(y_pred, y)
    train_loss += loss
    train_acc = accuracy_fn(y_true = y, y_pred =y_pred.argmax(dim = 1))
    # 3. Optimizer zero grad
    optimizer.zero_grad()
    # 4. Loss backward
    loss.backward()
    # 5. Optimizer step
    optimizer.step()
  # Calculate the loss and accuracy per epoch and print out what's happening
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step(model:torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              y: torch.Tensor,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
  """Calculates the training computation of a PyTorch model
  Arge:
    model: A pytorch class for model
    data_loader: The dataloader of pytorch
    loss_fn: Loss function for the model
    optimzer: optimizer of the loss
    accuracy_fn: Accuracy function for calculation
    devic: Device agnostic code cuda or cpu
  """
  test_loss, test_acc = 0, 0
  model.to(device)
  model.eval()
  with torch.inference_mode():
    for X in data_loader:
      X, y = X.to(device), y.to(device)
      # 1. forward pass
      test_pred = model(X)
      # 2. Calculate loss and accuracy
      test_loss = loss_fn(test_pred, y)
      test_acc += accuracy_fn(y_true = y, y_pred = test_pred.argmax(dim = 1)) # Go from logitrs -> pred labels
    # Adjust metrics and print out what's happening
    test_loss /= len(data_loader)
    test_acc /= len(data_loader)
    print(f"Test loss: {test_loss:.5f} | Test Accuracy: {test_acc:.2f}%")

In [47]:
# Define the loss function and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [56]:
# Training loop
torch.manual_seed(42)

# tqdm
from tqdm.auto import tqdm

num_epochs = 100
for epoch in tqdm(range(num_epochs)):
    print(f"Epoch: {epoch}\n...")
    train_step(data_loader = train_data_loader,
             model = model,
             y = y_train,
             loss_fn = loss_fn,
             optimizer = optimizer,
             accuracy_fn = accuracy_fn)
    test_step(data_loader = test_data_loader,
                y = y_test,
                model = model,
                loss_fn = loss_fn,
                accuracy_fn = accuracy_fn)

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0
...


ValueError: Target size (torch.Size([320])) must be the same as input size (torch.Size([32, 1]))

^C
