In [1]:
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
from sklearn.model_selection import GridSearchCV

In [2]:
# CIFAR-10
CIFAR_transform_train = transforms.Compose([transforms.ToTensor()])
CIFAR_transform_test =transforms.Compose([transforms.ToTensor()])
trainset_CIFAR = datasets.CIFAR10(root='./data', train=True, download=True, transform=
CIFAR_transform_train)
testset_CIFAR = datasets.CIFAR10(root='./data', train=False, download=True,
transform=CIFAR_transform_test)
CIFAR_train = DataLoader(trainset_CIFAR, batch_size=32, shuffle=True, num_workers=2)
CIFAR_test = DataLoader(testset_CIFAR, batch_size=32, shuffle=False, num_workers=2)
CIFAR_train_images = []
CIFAR_train_labels = []
for batch in CIFAR_train:
    images, labels = batch
    images_flat = images.view(images.shape[0], -1)
    CIFAR_train_images.append(images_flat.numpy())
    CIFAR_train_labels.append(labels.numpy())
CIFAR_train_images = np.vstack(CIFAR_train_images)
CIFAR_train_labels = np.concatenate(CIFAR_train_labels)
CIFAR_test_images = []
CIFAR_test_labels = []
for batch in CIFAR_test:
    images, labels = batch
    images_flat = images.view(images.shape[0], -1)
    CIFAR_test_images.append(images_flat.numpy())
    CIFAR_test_labels.append(labels.numpy())
CIFAR_test_images = np.vstack(CIFAR_test_images)
CIFAR_test_labels = np.concatenate(CIFAR_test_labels)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# MNIST
mnist_train_transform = transforms.Compose([transforms.ToTensor()])
mnist_test_transform = transforms.Compose([transforms.ToTensor()])
trainset_mnist = datasets.MNIST(root='./data', train=True, download=True,
transform=mnist_train_transform)
testset_mnist = datasets.MNIST(root='./data', train=False, download=True,
transform=mnist_test_transform)
MNIST_train = DataLoader(trainset_mnist, batch_size=32, shuffle=True, num_workers=2)
MNIST_test = DataLoader(testset_mnist, batch_size=32, shuffle=False, num_workers=2)
MNIST_train_images = []
MNIST_train_labels = []
for batch in MNIST_train:
    images, labels = batch
    images_flat = images.view(images.shape[0], -1)
    MNIST_train_images.append(images_flat.numpy())
    MNIST_train_labels.append(labels.numpy())
MNIST_train_images = np.vstack(MNIST_train_images)
MNIST_train_labels = np.concatenate(MNIST_train_labels)
MNIST_test_images = []
MNIST_test_labels = []
for batch in MNIST_test:
    images, labels = batch
    images_flat = images.view(images.shape[0], -1)
    MNIST_test_images.append(images_flat.numpy())
    MNIST_test_labels.append(labels.numpy())
MNIST_test_images = np.vstack(MNIST_test_images)
MNIST_test_labels = np.concatenate(MNIST_test_labels)

Decision Tree for MNIST

In [5]:
params_grid = { 'min_samples_split': [2, 5, 10],
               'min_samples_leaf': [1, 2, 4],
               'max_leaf_nodes': [5, 10, None]}

for depth in range (1,5):
    tree = DecisionTreeClassifier(max_depth=3*depth)
    GR=GridSearchCV(tree,param_grid=params_grid,cv=5,scoring='accuracy',refit=True)
    GR.fit(MNIST_train_images,MNIST_train_labels)
    print("Best param :",GR.best_params_)
    print('Training Accuracy:', GR.score(MNIST_train_images,MNIST_train_labels))
    print('Test Accuracy:', GR.score(MNIST_test_images,MNIST_test_labels))

Best param : {'max_leaf_nodes': 10, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy: 0.49151666666666666
Test Accuracy: 0.4953
Best param : {'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy: 0.73825
Test Accuracy: 0.7415
Best param : {'max_leaf_nodes': None, 'min_samples_leaf': 4, 'min_samples_split': 2}
Training Accuracy: 0.8654833333333334
Test Accuracy: 0.8494
Best param : {'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy: 0.94915
Test Accuracy: 0.8772


|Depth|Training Accuracy|Test Accuracy|Best param|
|-----|-----------------|-------------|-----------------------------------------------|
|3|0.49151666666|0.4953|'max_leaf_nodes': 10, 'min_samples_leaf': 1, 'min_samples_split': 2|
|6|0.73825|0.7415|'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2|
|9|0.86548333333|0.8494|'max_leaf_nodes': None, 'min_samples_leaf': 4, 'min_samples_split': 2|
|12|0.94915|0.8772|'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2|

Decision Tree for CIFAR

In [4]:
params_grid = { 'min_samples_split': [2, 5, 10],
               'min_samples_leaf': [1, 2, 4],
               'max_leaf_nodes': [5, 10, None]}

for depth in range (1,5):
    tree = DecisionTreeClassifier(max_depth=3*depth)
    GR=GridSearchCV(tree,param_grid=params_grid,cv=5,scoring='accuracy',refit=True)
    GR.fit(CIFAR_train_images,CIFAR_train_labels)
    print("Best param :",GR.best_params_)
    print('Training Accuracy:', GR.score(CIFAR_train_images,CIFAR_train_labels))
    print('Test Accuracy:', GR.score(CIFAR_test_images,CIFAR_test_labels))

Best param : {'max_leaf_nodes': 10, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy: 0.23762
Test Accuracy: 0.2394
Best param : {'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy: 0.29588
Test Accuracy: 0.2812
Best param : {'max_leaf_nodes': None, 'min_samples_leaf': 4, 'min_samples_split': 10}
Training Accuracy: 0.38212
Test Accuracy: 0.3042
Best param : {'max_leaf_nodes': None, 'min_samples_leaf': 4, 'min_samples_split': 5}
Training Accuracy: 0.521
Test Accuracy: 0.3044


|Depth|Training Accuracy|Test Accuracy|Best param|
|-----|-----------------|-------------|-----------------------------------------------|
|3|0.23762|0.2394|'max_leaf_nodes': 10, 'min_samples_leaf': 1, 'min_samples_split': 2|
|6|0.29588|0.2812|'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2|
|9|0.38212|0.3042|'max_leaf_nodes': None, 'min_samples_leaf': 4, 'min_samples_split': 10|
|12|0.521|0.3044|'max_leaf_nodes': None, 'min_samples_leaf': 4, 'min_samples_split': 5|

SVM for MNIST

In [5]:
svm_clf=svm.SVC(kernel='linear')
svm_clf.fit(MNIST_train_images,MNIST_train_labels)
print('Linear Training Accuracy: ',svm_clf.score(MNIST_train_images,MNIST_train_labels))
print('Linear Test Accuracy: ',svm_clf.score(MNIST_test_images,MNIST_test_labels))
svm_clf=svm.SVC(kernel='rbf')
svm_clf.fit(MNIST_train_images,MNIST_train_labels)
print('RBF Training Accuracy: ',svm_clf.score(MNIST_train_images,MNIST_train_labels))
print('RBF Test Accuracy: ',svm_clf.score(MNIST_test_images,MNIST_test_labels))

Linear Training Accuracy:  0.9707333333333333
Linear Test Accuracy:  0.9403
RBF Training Accuracy:  0.9899166666666667
RBF Test Accuracy:  0.9792


|Kernel|Training Accuracy|Test Accuracy|
|-----|-----------------|-------------|
|Linear|0.97073333333|0.9403|
|RBF|0.98991666666|0.9792|

SVM for CIFAR

In [6]:
svm_clf=svm.SVC(kernel='linear')
svm_clf.fit(CIFAR_train_images,CIFAR_train_labels)
print('Linear Training Accuracy: ',svm_clf.score(CIFAR_train_images,CIFAR_train_labels))
print('Linear Test Accuracy: ',svm_clf.score(CIFAR_test_images,CIFAR_test_labels))
svm_clf=svm.SVC(kernel='rbf')
svm_clf.fit(CIFAR_train_images,CIFAR_train_labels)
print('RBF Training Accuracy: ',svm_clf.score(CIFAR_train_images,CIFAR_train_labels))
print('RBF Test Accuracy: ',svm_clf.score(CIFAR_test_images,CIFAR_test_labels))

Linear Training Accuracy:  0.5749
Linear Test Accuracy:  0.3755
RBF Training Accuracy:  0.70286
RBF Test Accuracy:  0.5436


|Kernel|Training Accuracy|Test Accuracy|
|-----|-----------------|-------------|
|Linear|0.5749|0.3755|
|RBF|0.70286|0.5436|