In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import torch
import torchvision
import torchvision.transforms as transforms
from matplotlib import  pyplot as plt
import _pickle as cPickle
import pickle
import os.path
import warnings
from sklearn.tree import  DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm
projPath = './gdrive/MyDrive/CSE498-homework/hw1'
dataDir = f'{projPath}/db/'
modelDir = f'{projPath}/model/'

Mounted at /content/gdrive


# MNIST Dataset


In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

testset = torchvision.datasets.MNIST(dataDir, train=False,  download=False, transform=transform)
nTestSamples, width, height = testset.data.shape
print(f'# test samples:{nTestSamples}')
print(f'per image size: {width}*{height}')
Xtest = testset.data.view([nTestSamples, width*height])
Ytest = testset.targets.view([nTestSamples,1])

# test samples:10000
per image size: 28*28


## Decision Tree

In [5]:
modelPath = modelDir+ '{}.pkl'.format('decisionTree')
with open(modelPath,"rb") as f:
  dtreeBest = pickle.load(f)
Ypred = dtreeBest.predict(Xtest)
print(f'Classification accuracy:{accuracy_score(Ypred, Ytest):6.5f} | Macro-F1 score:{f1_score(Ypred, Ytest, average="macro"):6.5f}')

Classification accuracy:0.88120 | Macro-F1 score:0.87962


## Gradient Boosting

In [6]:
modelPath = modelDir+ '{}.pkl'.format('gradientBoosting')
with open(modelPath,"rb") as f:
  gbBest = pickle.load(f)
Ypred = gbBest.predict(Xtest)
print(f'Classification accuracy:{accuracy_score(Ypred, Ytest):6.5f} | Macro-F1 score:{f1_score(Ypred, Ytest, average="macro"):6.5f}')

Classification accuracy:0.94580 | Macro-F1 score:0.94528


## Naive Bayes - Gasussian

In [7]:
modelPath = modelDir+ '{}.pkl'.format('GaussianNB')
with open(modelPath,"rb") as f:
  nbBest = pickle.load(f)
Ypred = nbBest.predict(Xtest)
print(f'Classification accuracy:{accuracy_score(Ypred, Ytest):6.5f} | Macro-F1 score:{f1_score(Ypred, Ytest, average="macro"):6.5f}')

Classification accuracy:0.81400 | Macro-F1 score:0.81235


# CIFAR-10

In [8]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
testset = torchvision.datasets.CIFAR10(dataDir, train=False,  download=False, transform=transform)
nTestSamples, width, height, channel = testset.data.shape
print(f'per image size: {width}*{height} | per image channel:{channel}')
Xtest = testset.data.reshape([nTestSamples, width*height*channel])
Ytest = torch.tensor(testset.targets)
print(f'Xtest  shape: {Xtest.shape} | Ytest shape: {Ytest.shape}')

per image size: 32*32 | per image channel:3
Xtest  shape: (10000, 3072) | Ytest shape: torch.Size([10000])


# Naive Bayes - Gaussian

In [9]:
modelPath = modelDir+ '{}.pkl'.format('cifa10-GaussianNB')
with open(modelPath,"rb") as f:
  nbBest = pickle.load(f)
Ypred = nbBest.predict(Xtest)
print(f'Classification accuracy:{accuracy_score(Ypred, Ytest):6.5f} | Macro-F1 score:{f1_score(Ypred, Ytest, average="macro"):6.5f}')

Classification accuracy:0.29760 | Macro-F1 score:0.27546


## Decision Tree

In [10]:
modelPath = modelDir+ '{}.pkl'.format('cifa10-decisionTree')
with open(modelPath,"rb") as f:
  dtreeBest = pickle.load(f)
Ypred = dtreeBest.predict(Xtest)
print(f'Classification accuracy:{accuracy_score(Ypred, Ytest):6.5f} | Macro-F1 score:{f1_score(Ypred, Ytest, average="macro"):6.5f}')

Classification accuracy:0.26970 | Macro-F1 score:0.26961


## Gradient Boosting

In [12]:
modelPath = modelDir+ '{}.pkl'.format('cifa10-gradientBoosting')
with open(modelPath,"rb") as f:
  nbBest = pickle.load(f)
Ypred = nbBest.predict(Xtest)
print(f'Classification accuracy:{accuracy_score(Ypred, Ytest):6.5f} | Macro-F1 score:{f1_score(Ypred, Ytest, average="macro"):6.5f}')

Classification accuracy:0.48250 | Macro-F1 score:0.47972
