In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zalando-research/fashionmnist")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/zalando-research/fashionmnist?dataset_version_number=4...


100%|██████████| 68.8M/68.8M [00:03<00:00, 23.6MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/zalando-research/fashionmnist/versions/4


In [2]:
from tensorflow.keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Flatten images and scale pixel values
x_train = x_train.reshape(x_train.shape[0], -1).astype('float32') / 255.0
x_test = x_test.reshape(x_test.shape[0], -1).astype('float32') / 255.0

# Standardize features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [4]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

linear_params = {'C': [0.1, 1, 10]}
linear_svm = GridSearchCV(SVC(kernel='linear'), param_grid=linear_params, cv=3)
linear_svm.fit(x_train[:5000], y_train[:5000])  # Training on a subset to reduce computation

print("Best C for linear kernel:", linear_svm.best_params_)


Best C for linear kernel: {'C': 0.1}


took [:5000] as the 21,000 images taking too long in the grid search



In [5]:
poly_params = {'C': [0.1, 1, 10], 'degree': [2, 3, 4]}

# Perform Grid Search
poly_svm = GridSearchCV(SVC(kernel='poly'), param_grid=poly_params, cv=3)
poly_svm.fit(x_train[:5000], y_train[:5000])

print("Best parameters for polynomial kernel:", poly_svm.best_params_)


Best parameters for polynomial kernel: {'C': 10, 'degree': 2}


In [6]:
rbf_params = {'C': [0.1, 1, 10], 'gamma': ['scale', 0.01, 0.1]}

rbf_svm = GridSearchCV(SVC(kernel='rbf'), param_grid=rbf_params, cv=3)
rbf_svm.fit(x_train[:5000], y_train[:5000])

print("Best parameters for RBF kernel:", rbf_svm.best_params_)


Best parameters for RBF kernel: {'C': 10, 'gamma': 'scale'}


In [7]:
from sklearn.metrics import accuracy_score

linear_preds = linear_svm.best_estimator_.predict(x_test)
poly_preds = poly_svm.best_estimator_.predict(x_test)
rbf_preds = rbf_svm.best_estimator_.predict(x_test)

linear_acc = accuracy_score(y_test, linear_preds)
poly_acc = accuracy_score(y_test, poly_preds)
rbf_acc = accuracy_score(y_test, rbf_preds)

print(f"Linear SVM Accuracy: {linear_acc:.4f}")
print(f"Polynomial SVM Accuracy: {poly_acc:.4f}")
print(f"RBF SVM Accuracy: {rbf_acc:.4f}")


Linear SVM Accuracy: 0.8079
Polynomial SVM Accuracy: 0.8447
RBF SVM Accuracy: 0.8510
