In [1]:
import numpy as np
import torchvision
import torchvision.transforms as transforms
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score,GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import torch
import torchvision

In [2]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10), 
    transforms.RandomCrop(32, padding=4),  
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

In [3]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)



Files already downloaded and verified
Files already downloaded and verified


In [4]:
selected_classes = np.random.choice(range(10), 2, replace=False)

In [5]:
train_indices = []
test_indices = []

for idx, (image, label) in enumerate(trainset):
    if label in selected_classes:  # Include all samples from selected classes for training
        train_indices.append(idx)

for idx, (image, label) in enumerate(testset):
    if label in selected_classes:  # Include all samples from selected classes for testing
        test_indices.append(idx)

# Shuffle the indices
np.random.shuffle(train_indices)
np.random.shuffle(test_indices)

# Check if enough samples are found for training and testing
if len(train_indices) < 25 or len(test_indices) < 2000:
    raise ValueError("Insufficient samples found for training or testing.")

In [6]:
train_indices = train_indices[:25]
test_indices = test_indices[:2000]

trainloader = torch.utils.data.DataLoader(trainset, batch_size=25, sampler=torch.utils.data.SubsetRandomSampler(train_indices))
testloader = torch.utils.data.DataLoader(testset, batch_size=2000, sampler=torch.utils.data.SubsetRandomSampler(test_indices))

print("Selected classes:", selected_classes)

# Print the number of samples selected for training and testing
print("Number of training samples per class:", len(train_indices) // len(selected_classes))
print("Number of testing samples per class:", len(test_indices) // len(selected_classes))




Selected classes: [5 9]
Number of training samples per class: 12
Number of testing samples per class: 1000


In [7]:
train_data = []
train_labels = []
test_data = []
test_labels = []

In [8]:
for images, labels in trainloader:
    for image, label in zip(images, labels):
        train_data.append(image.numpy().flatten())  # Flatten the image tensor
        train_labels.append(label.item())

for images, labels in testloader:
    for image, label in zip(images, labels):
        test_data.append(image.numpy().flatten())  # Flatten the image tensor
        test_labels.append(label.item())

In [9]:
train_data = np.array(train_data)
train_labels = np.array(train_labels)
test_data = np.array(test_data)
test_labels = np.array(test_labels)

In [10]:
pca = PCA(n_components=0.70)  # Retain 95% of variance
X_train_reduced = pca.fit_transform(train_data)
X_test_reduced = pca.transform(test_data)

In [11]:
models = {
    'SVM': SVC(probability=True),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
}

In [12]:
param_grids = {
    'SVM': {'C': [0.1, 0.5, 1, 5,7, 10], 'gamma': ['scale', 'auto'], 'kernel': ['rbf', 'poly']},
    'Random Forest': {'n_estimators': [50, 100, 150], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]},
    'KNN': {'n_neighbors': [3, 5, 7], 'weights': ['uniform', 'distance'], 'algorithm': ['auto', 'ball_tree', 'kd_tree']}
}

In [13]:
for name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[name], cv=5, scoring='accuracy')
    grid_search.fit(X_train_reduced, train_labels)
    
    print(f'Best hyperparameters for {name}: {grid_search.best_params_}')
    
    best_model = grid_search.best_estimator_
    
    # Train on the entire training set and evaluate on the unseen test set
    best_model.fit(X_train_reduced, train_labels)
    predictions = best_model.predict(X_test_reduced)
    accuracy = accuracy_score(test_labels, predictions)
    print(f'{name} Test Accuracy: {accuracy*100:.2f}%')

Best hyperparameters for SVM: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
SVM Test Accuracy: 73.25%
Best hyperparameters for Random Forest: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 150}
Random Forest Test Accuracy: 78.50%
Best hyperparameters for KNN: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'uniform'}
KNN Test Accuracy: 70.90%
