In [7]:
import torch
import torchvision.transforms as transforms
import numpy as np
import torchvision
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Subset


In [8]:

# Define CIFAR-10 mean and standard deviation
mean = [0.4914, 0.4822, 0.4465]
std = [0.2023, 0.1994, 0.2010]

# Define transformations for training and validation data
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])



In [9]:
# Define number of classes and samples per class
num_classes = 2
num_samples_per_class = 25

# Load CIFAR-10 dataset
cifar_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)

# Select random classes for training
random_classes = np.random.choice(range(10), num_classes, replace=False)
selected_indices_train = []
selected_indices_val = []

# Select samples for training and validation
for i in random_classes:
    class_indices = np.where(np.array(cifar_data.targets) == i)[0]
    selected_indices_train.extend(np.random.choice(class_indices, num_samples_per_class, replace=False))
    remaining_indices = list(set(class_indices) - set(selected_indices_train))
    selected_indices_val.extend(remaining_indices)



Files already downloaded and verified


In [10]:
# Create Subset datasets for training and validation
train_data = Subset(cifar_data, selected_indices_train)
val_data = Subset(cifar_data, selected_indices_val)

# Create DataLoader for training and validation datasets
train_loader = torch.utils.data.DataLoader(train_data, batch_size=len(train_data), shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=len(val_data), shuffle=False)

# Extract features and labels from training and validation datasets
train_features, train_labels = next(iter(train_loader))
val_features, val_labels = next(iter(val_loader))

train_features = train_features.view(train_features.size(0), -1).numpy()
val_features = val_features.view(val_features.size(0), -1).numpy()

# Normalize features using StandardScaler
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled = scaler.transform(val_features)


In [11]:

# Define SVM model hyperparameters
hyperparameters = [
    {'C': 0.5, 'gamma': 'scale', 'random_seed': 2},
    {'C': 1.0, 'gamma': 'scale', 'random_seed': 1},
    {'C': 2.0, 'gamma': 'scale', 'random_seed': 2},
    {'C': 3.0, 'gamma': 'scale', 'random_seed': 1},
    {'C': 0.5, 'gamma': 'scale', 'random_seed': 1},
    {'C': 1.0, 'gamma': 'auto', 'random_seed': 2},
    {'C': 2.0, 'gamma': 'auto', 'random_seed': 1},
    {'C': 3.0, 'gamma': 'auto', 'random_seed': 2},
]

# Train SVM models with different hyperparameters
svm_models = []
svm_accuracies = []
for params in hyperparameters:
    # Initialize SVM model with specified hyperparameters
    model = SVC(kernel='rbf', C=params['C'], gamma=params['gamma'], random_state=params['random_seed'])

    # Train model
    model.fit(train_features_scaled, train_labels)

    # Add trained model to list
    svm_models.append(model)

    # Predict on validation set
    val_predictions = model.predict(val_features_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(val_labels, val_predictions)
    print("Taken Parameters: {}".format(params))
    print("Accuracy: {}".format(accuracy))
    svm_accuracies.append(accuracy)

# Find the best hyperparameter combination and its accuracy
best_idx = np.argmax(svm_accuracies)
best_hyperparams = hyperparameters[best_idx]
best_accuracy = svm_accuracies[best_idx]

print("Best Hyperparameters:")
print(best_hyperparams)
print(f"Best Accuracy: {best_accuracy * 100:.2f}%")



Taken Parameters: {'C': 0.5, 'gamma': 'scale', 'random_seed': 2}
Accuracy: 0.6212060301507538
Taken Parameters: {'C': 1.0, 'gamma': 'scale', 'random_seed': 1}
Accuracy: 0.6203015075376884
Taken Parameters: {'C': 2.0, 'gamma': 'scale', 'random_seed': 2}
Accuracy: 0.6236180904522614
Taken Parameters: {'C': 3.0, 'gamma': 'scale', 'random_seed': 1}
Accuracy: 0.624321608040201
Taken Parameters: {'C': 0.5, 'gamma': 'scale', 'random_seed': 1}
Accuracy: 0.6212060301507538
Taken Parameters: {'C': 1.0, 'gamma': 'auto', 'random_seed': 2}
Accuracy: 0.6203015075376884
Taken Parameters: {'C': 2.0, 'gamma': 'auto', 'random_seed': 1}
Accuracy: 0.6236180904522614
Taken Parameters: {'C': 3.0, 'gamma': 'auto', 'random_seed': 2}
Accuracy: 0.624321608040201
Best Hyperparameters:
{'C': 3.0, 'gamma': 'scale', 'random_seed': 1}
Best Accuracy: 62.43%


In [12]:
# Predictions from individual SVM models
individual_predictions = [model.predict(val_features_scaled) for model in svm_models]

# Ensemble predictions by averaging
ensemble_predictions_avg = np.mean(individual_predictions, axis=0)

# Ensemble predictions by voting
ensemble_predictions_vote = np.round(np.mean(individual_predictions, axis=0))

# Convert ensemble predictions to discrete values (0 or 1)
ensemble_predictions_avg_discrete = np.round(ensemble_predictions_avg)
ensemble_predictions_vote_discrete = np.round(ensemble_predictions_vote)

# Calculate accuracy for ensemble predictions
accuracy_avg_discrete = accuracy_score(val_labels, ensemble_predictions_avg_discrete)
accuracy_vote_discrete = accuracy_score(val_labels, ensemble_predictions_vote_discrete)

print("Ensemble Accuracy (Averaging) after conversion: {:.2f}%".format(accuracy_avg_discrete * 100))
print("Ensemble Accuracy (Voting) after conversion: {:.2f}%".format(accuracy_vote_discrete * 100))

Ensemble Accuracy (Averaging) after conversion: 62.38%
Ensemble Accuracy (Voting) after conversion: 62.38%
