In [None]:
%pip install torch torchvision scikit-learn matplotlib tqdm tensorboard

In [None]:
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
from torch.utils.tensorboard import SummaryWriter
import datetime
from sklearn.model_selection import train_test_split

# TensorBoard setup
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(log_dir)

In [None]:
# Data loading and preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

In [None]:
# Convert data to numpy arrays
X_train, y_train = next(iter(train_loader))
X_test, y_test = next(iter(test_loader))

X_train = X_train.numpy().reshape(X_train.shape[0], -1)
y_train = y_train.numpy()
X_test = X_test.numpy().reshape(X_test.shape[0], -1)
y_test = y_test.numpy()

In [None]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Split training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# SVM classifier
svm_classifier = OneVsRestClassifier(svm.SVC(kernel='rbf', C=1, gamma='scale', probability=True))

In [None]:
# Function to train and evaluate the model
def train_and_evaluate(X_train, y_train, X_val, y_val, X_test, y_test):
    print("Training SVM classifier...")
    svm_classifier.fit(X_train, y_train)
    
    # Evaluate on training set
    y_train_pred = svm_classifier.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    
    # Evaluate on validation set
    y_val_pred = svm_classifier.predict(X_val)
    val_accuracy = accuracy_score(y_val, y_val_pred)
    
    # Evaluate on test set
    y_test_pred = svm_classifier.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    return train_accuracy, val_accuracy, test_accuracy

In [None]:
# Train and evaluate the model with different training set sizes
train_sizes = np.linspace(0.1, 1.0, 10)
for i, train_size in enumerate(train_sizes):
    n_samples = int(len(X_train) * train_size)
    X_train_subset = X_train[:n_samples]
    y_train_subset = y_train[:n_samples]
    
    train_accuracy, val_accuracy, test_accuracy = train_and_evaluate(X_train_subset, y_train_subset, X_val, y_val, X_test, y_test)
    
    print(f"Training size: {n_samples}")
    print(f"Train accuracy: {train_accuracy:.4f}")
    print(f"Validation accuracy: {val_accuracy:.4f}")
    print(f"Test accuracy: {test_accuracy:.4f}")
    print()
    
    # Log to TensorBoard
    writer.add_scalar('Accuracy/train', train_accuracy, n_samples)
    writer.add_scalar('Accuracy/validation', val_accuracy, n_samples)
    writer.add_scalar('Accuracy/test', test_accuracy, n_samples)

# Close the TensorBoard writer
writer.close()

print("Training and evaluation complete. TensorBoard logs have been written.")

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs/fit