In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load the dataset
train_data = pd.read_csv('kaggle-mnist/sign_mnist_train/sign_mnist_train.csv')
test_data = pd.read_csv('kaggle-mnist/sign_mnist_test/sign_mnist_test.csv')

# Separate features (X) and labels (y)
X_train = train_data.iloc[:, 1:].values  # Pixel values
y_train = train_data.iloc[:, 0].values   # Labels
X_test = test_data.iloc[:, 1:].values    # Pixel values
y_test = test_data.iloc[:, 0].values     # Labels

# Normalize the pixel values to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

In [2]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [3]:
from sklearn.svm import SVC

# Initialise the SVM model
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)

# Train the model
model.fit(X_train, y_train)

In [4]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict on the validation set
y_val_pred = model.predict(X_val)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

# Classification report
print("Classification Report:")
print(classification_report(y_val, y_val_pred))

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_val, y_val_pred))

Validation Accuracy: 0.9998
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       221
           1       1.00      1.00      1.00       188
           2       1.00      1.00      1.00       220
           3       1.00      1.00      1.00       256
           4       1.00      1.00      1.00       202
           5       1.00      1.00      1.00       242
           6       1.00      1.00      1.00       221
           7       1.00      1.00      1.00       205
           8       1.00      1.00      1.00       219
          10       1.00      1.00      1.00       219
          11       1.00      1.00      1.00       268
          12       1.00      1.00      1.00       232
          13       1.00      1.00      1.00       209
          14       1.00      1.00      1.00       219
          15       1.00      1.00      1.00       202
          16       1.00      1.00      1.00       291
          17       1.00      1

In [5]:
# Predict on the test set
y_test_pred = model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Classification report
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

# Confusion matrix
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

Test Accuracy: 0.8397
Test Classification Report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       331
           1       1.00      0.97      0.98       432
           2       0.88      0.99      0.93       310
           3       0.93      1.00      0.96       245
           4       0.93      0.99      0.96       498
           5       0.78      0.83      0.80       247
           6       0.93      0.92      0.93       348
           7       0.98      0.95      0.96       436
           8       0.79      0.89      0.84       288
          10       0.76      0.61      0.68       331
          11       0.87      1.00      0.93       209
          12       0.86      0.73      0.79       394
          13       0.90      0.68      0.78       291
          14       0.99      0.82      0.90       246
          15       1.00      1.00      1.00       347
          16       1.00      0.99      0.99       164
          17       0.27      0.

In [6]:
# Hyperparameter Tuning

from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'C' : [0.1, 1, 10],
    'gamma' : ['scale', 'auto'],
    'kernel' : ['rbf', 'linear']
}

# Perform grid search
grid_search = GridSearchCV(SVC(), param_grid, cv=3, verbose=2)
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time= 2.6min
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time= 2.6min
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time= 2.7min
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  32.4s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  33.5s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  33.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time= 3.9min
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time= 4.2min
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time= 3.9min
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=  32.3s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=  32.8s
[CV] END ...................C=0.1, gamma=auto, k

In [14]:
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}


In [16]:
# Initialize the SVM model with the best parameters
final_model = SVC(
    C=best_params['C'],
    gamma=best_params['gamma'],
    kernel=best_params['kernel'],
    random_state=42
)

# Train the model on the full training set
final_model.fit(X_train, y_train)

In [18]:
# Predict on the test set
y_test_pred = final_model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Classification report
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

# Confusion matrix
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

Test Accuracy: 0.7864
Test Classification Report:
              precision    recall  f1-score   support

           0       0.83      1.00      0.91       331
           1       1.00      0.95      0.97       432
           2       0.78      0.98      0.87       310
           3       0.96      1.00      0.98       245
           4       0.92      1.00      0.96       498
           5       0.72      0.83      0.77       247
           6       0.84      0.89      0.86       348
           7       0.95      0.91      0.93       436
           8       0.70      0.76      0.73       288
          10       0.66      0.41      0.51       331
          11       0.67      0.65      0.66       209
          12       0.87      0.68      0.76       394
          13       0.87      0.61      0.72       291
          14       0.88      0.65      0.75       246
          15       0.96      1.00      0.98       347
          16       0.76      0.95      0.84       164
          17       0.31      0.

In [20]:
# Initialize the SVM model with the best parameters
final_model = SVC(
    C=best_params['C'],
    gamma=best_params['gamma'],
    kernel=best_params['kernel'],
    random_state=42
)

# Train the model on the full training set
final_model.fit(X_train, y_train)


In [22]:
# Predict on the test set
y_test_pred = final_model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Classification report
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

# Confusion matrix
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

Test Accuracy: 0.7864
Test Classification Report:
              precision    recall  f1-score   support

           0       0.83      1.00      0.91       331
           1       1.00      0.95      0.97       432
           2       0.78      0.98      0.87       310
           3       0.96      1.00      0.98       245
           4       0.92      1.00      0.96       498
           5       0.72      0.83      0.77       247
           6       0.84      0.89      0.86       348
           7       0.95      0.91      0.93       436
           8       0.70      0.76      0.73       288
          10       0.66      0.41      0.51       331
          11       0.67      0.65      0.66       209
          12       0.87      0.68      0.76       394
          13       0.87      0.61      0.72       291
          14       0.88      0.65      0.75       246
          15       0.96      1.00      0.98       347
          16       0.76      0.95      0.84       164
          17       0.31      0.

In [24]:
# Initialise the SVM model
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)

# Train the model
model.fit(X_train, y_train)

In [26]:
# Predict on the test set
y_test_pred = final_model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Classification report
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

# Confusion matrix
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

Test Accuracy: 0.7864
Test Classification Report:
              precision    recall  f1-score   support

           0       0.83      1.00      0.91       331
           1       1.00      0.95      0.97       432
           2       0.78      0.98      0.87       310
           3       0.96      1.00      0.98       245
           4       0.92      1.00      0.96       498
           5       0.72      0.83      0.77       247
           6       0.84      0.89      0.86       348
           7       0.95      0.91      0.93       436
           8       0.70      0.76      0.73       288
          10       0.66      0.41      0.51       331
          11       0.67      0.65      0.66       209
          12       0.87      0.68      0.76       394
          13       0.87      0.61      0.72       291
          14       0.88      0.65      0.75       246
          15       0.96      1.00      0.98       347
          16       0.76      0.95      0.84       164
          17       0.31      0.

In [42]:
# Initialise the SVM model
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_test_pred = model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Classification report
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

# Confusion matrix
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

Test Accuracy: 0.8397
Test Classification Report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       331
           1       1.00      0.97      0.98       432
           2       0.88      0.99      0.93       310
           3       0.93      1.00      0.96       245
           4       0.93      0.99      0.96       498
           5       0.78      0.83      0.80       247
           6       0.93      0.92      0.93       348
           7       0.98      0.95      0.96       436
           8       0.79      0.89      0.84       288
          10       0.76      0.61      0.68       331
          11       0.87      1.00      0.93       209
          12       0.86      0.73      0.79       394
          13       0.90      0.68      0.78       291
          14       0.99      0.82      0.90       246
          15       1.00      1.00      1.00       347
          16       1.00      0.99      0.99       164
          17       0.27      0.