In [None]:
# Install missing packages
%pip install torch

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Collecting torch
  Downloading torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
[0mCollecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
[0m

In [31]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load MNIST dataset
print("Loading MNIST dataset...")
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist["data"], mnist["target"]

# Convert to numpy arrays and normalize
X = X.astype(np.float32) / 255.0
y = y.astype(np.int32)

# Binary classification: classify digit 0 vs digit 1
X = X[(y == 0) | (y == 1)]
y = y[(y == 0) | (y == 1)]
y = np.where(y == 0, -1, 1)  # Convert labels to -1 and 1 for SVM

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Using device: cuda
Loading MNIST dataset...


  warn(


In [32]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle=True)

In [33]:
# Define SVM model
class LinearSVM(nn.Module):
    def __init__(self, input_dim):
        super(LinearSVM, self).__init__()
        self.linear = nn.Linear(input_dim, 1)  # Single output for binary classification

    def forward(self, x):
        return self.linear(x).squeeze()  # Squeeze to remove extra dimension

# Define hinge loss
def hinge_loss(output, target):
    return torch.mean(torch.clamp(1 - output * target, min=0))

# Initialize model, optimizer, and hyperparameters
input_dim = X_train.shape[1]
model = LinearSVM(input_dim).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=0.01)  # L2 regularization
num_epochs = 20

# Training loop
print("Training SVM...")
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = hinge_loss(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}")

Training SVM...
Epoch [1/20], Loss: 0.1048
Epoch [2/20], Loss: 0.0268
Epoch [3/20], Loss: 0.0198
Epoch [4/20], Loss: 0.0158
Epoch [5/20], Loss: 0.0134
Epoch [6/20], Loss: 0.0117
Epoch [7/20], Loss: 0.0103
Epoch [8/20], Loss: 0.0094
Epoch [9/20], Loss: 0.0087
Epoch [10/20], Loss: 0.0079
Epoch [11/20], Loss: 0.0075
Epoch [12/20], Loss: 0.0069
Epoch [13/20], Loss: 0.0066
Epoch [14/20], Loss: 0.0061
Epoch [15/20], Loss: 0.0059
Epoch [16/20], Loss: 0.0056
Epoch [17/20], Loss: 0.0054
Epoch [18/20], Loss: 0.0051
Epoch [19/20], Loss: 0.0048
Epoch [20/20], Loss: 0.0047


In [34]:
# Evaluation
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    predictions = torch.sign(test_outputs)
    accuracy = (predictions == y_test_tensor).float().mean()
    print(f"Test Accuracy: {accuracy.item():.4f}")

Test Accuracy: 0.9973


In [35]:
# Initialize and train the SVM model
print("Training SVM using scikit-learn...")
svm_model = SVC(kernel='linear', C=1.0)  # Linear SVM
svm_model.fit(X_train, y_train)

# Evaluate the model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

Training SVM using scikit-learn...
Test Accuracy: 0.9993
