In [None]:
!pip install datasets

import numpy as np
from datasets import load_dataset
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA



In [None]:
# Load dataset
dataset = load_dataset("akinsanyaayomide/skin_cancer_dataset_balanced_labels")


# Prepare training and test data
train_labels = np.array(dataset['train']['label'])
test_labels = np.array(dataset['test']['label'])

# Flatten the images
train_images = np.array(dataset['train']['image'])
train_images_flat = train_images.reshape(train_images.shape[0], -1)

test_images = np.array(dataset['test']['image'])
test_images_flat = test_images.reshape(test_images.shape[0], -1)

# Standardize the data
scaler = StandardScaler()
train_images = scaler.fit_transform(train_images_flat)
test_images = scaler.transform(test_images_flat)

In [None]:
# Apply PCA
pca = PCA(n_components=100)  # Retain 95% of variance
train_images_pca = pca.fit_transform(train_images)
test_images_pca = pca.transform(test_images)

In [None]:
print(train_images_pca.shape)

(16800, 100)


In [None]:
# Train a LinearSVC model
svm = SVC(C=10)

# Train the model on the whole training data
svm.fit(train_images_pca, train_labels)

# Predict on the test set
predictions = svm.predict(test_images_pca)

# Evaluate the model
print("Classification Report:\n", classification_report(test_labels, predictions))
print("Accuracy Score:\n", accuracy_score(test_labels, predictions))

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.47      0.49       524
           1       0.50      0.68      0.58       524
           2       0.39      0.45      0.42       524
           3       0.69      0.72      0.70       524
           4       0.54      0.34      0.42       524
           5       0.56      0.59      0.57       524
           6       0.53      0.44      0.49       524
           7       0.87      0.87      0.87       524

    accuracy                           0.57      4192
   macro avg       0.57      0.57      0.57      4192
weighted avg       0.57      0.57      0.57      4192

Accuracy Score:
 0.5696564885496184


In [None]:
param_grid = {
    'C': [10, 100],         # Regularization parameter
    'gamma': ['scale', 0.01],  # Kernel coefficient
    'kernel': ['rbf']  # SVM kernels
}

# Initialize SVM and GridSearchCV
svm = SVC(random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=2, verbose=2, n_jobs=-1)

grid_search.fit(train_images_pca, train_labels)
print("Best Parameters from GridSearchCV:", grid_search.best_params_)
print("Best Score from GridSearchCV:", grid_search.best_score_)

# Extract the best model from the grid search
best_model = grid_search.best_estimator_

# Predict on the test set
y_pred = best_model.predict(test_images_pca)

# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred)
print("Accuracy of the Best Model:", accuracy)

# Display the classification report
print("\nClassification Report:")
print(classification_report(test_labels, y_pred))

# Display the confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(test_labels, y_pred))



Fitting 2 folds for each of 4 candidates, totalling 8 fits
Best Parameters from GridSearchCV: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best Score from GridSearchCV: 0.5372619047619047
Accuracy of the Best Model: 0.5713263358778626

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.47      0.48       524
           1       0.50      0.68      0.57       524
           2       0.40      0.45      0.42       524
           3       0.69      0.72      0.70       524
           4       0.54      0.35      0.43       524
           5       0.56      0.58      0.57       524
           6       0.54      0.45      0.49       524
           7       0.87      0.87      0.87       524

    accuracy                           0.57      4192
   macro avg       0.57      0.57      0.57      4192
weighted avg       0.57      0.57      0.57      4192


Confusion Matrix:
[[245  75  50  39  21  11  74   9]
 [ 20 356  69  17  19  17  20   6]
 