# SMAI Mini Project 2

## Importing libraries

In [3]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np
import sklearn as sk
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE
from sklearn import svm
from sklearn import linear_model
from sklearn.neural_network import MLPClassifier
from sklearn import tree
from sklearn.metrics import accuracy_score, f1_score
from sklearn import grid_search

## Load CIFAR-10 Data

In [5]:

def load_cifar10_data(train_num=-1, test_num=-1):
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
    print("Total Shape of Train Data:", np.shape(x_train))
    print("Total Shape of Train Label:", np.shape(y_train))
    print("Total Shape of Test Data:", np.shape(x_test))
    print("Total Shape of Test Label:", np.shape(y_test))
    if train_num == -1:
        train_num = np.shape(x_train)[0]
    if test_num == -1:
        test_num = np.shape(x_test)[0]
    return x_train[0:train_num, :], y_train[0:train_num, :], x_test[0:test_num, :], y_test[0:test_num, :]


## Making representations

In [6]:
class Representation:
    def __init__(self, x_train, y_train, x_test, y_test):
        x_train = tf.image.rgb_to_grayscale(x_train)
        self.x_train = tf.divide(tf.to_float(x_train), tf.constant(255.0))
        self.y_train = y_train
        x_test = tf.image.rgb_to_grayscale(x_test)
        self.x_test = tf.divide(tf.to_float(x_test), tf.constant(255.0))
        self.y_test = y_test
        self.train_shape = np.shape(self.x_train)
        self.test_shape = np.shape(self.x_test)
        print("Transformed Train Data:", np.shape(self.x_train))
        print("Transformed Train Label:", np.shape(self.y_train))
        print("Transformed Test Data:", np.shape(self.x_test))
        print("Transformed Test Label:", np.shape(self.y_test))

    def get_raw(self):
        return (self.x_train, self.y_train, self.x_test, self.y_test)
    
    def get_flatten(self):
        x_train_flat = tf.reshape(self.x_train, [self.train_shape[0], self.train_shape[1]*self.train_shape[2]*self.train_shape[3]]).eval(session=tf.Session())
        x_test_flat = tf.reshape(self.x_test, [self.test_shape[0], self.test_shape[1]*self.test_shape[2]*self.test_shape[3]]).eval(session=tf.Session())
        return (x_train_flat, self.y_train, x_test_flat, self.y_test)

    def get_pca(self, num_components):
        x_train_pca = tf.reshape(self.x_train, [self.train_shape[0], self.train_shape[1]*self.train_shape[2]*self.train_shape[3]]).eval(session=tf.Session())
        x_test_pca = tf.reshape(self.x_test, [self.test_shape[0], self.test_shape[1]*self.test_shape[2]*self.test_shape[3]]).eval(session=tf.Session())
        pca = PCA(n_components=num_components)
        x_pca = pca.fit(x_train_pca)
        x_train_new = x_pca.transform(x_train_pca)
        x_test_new = x_pca.transform(x_test_pca)
        return (x_train_new, self.y_train, x_test_new, self.y_test)

    def get_lda(self, num_components):
        x_train_lda = tf.reshape(self.x_train, [self.train_shape[0], self.train_shape[1]*self.train_shape[2]*self.train_shape[3]]).eval(session=tf.Session())
        x_test_lda = tf.reshape(self.x_test, [self.test_shape[0], self.test_shape[1]*self.test_shape[2]*self.test_shape[3]]).eval(session=tf.Session())
        lda = LinearDiscriminantAnalysis(n_components=num_components)
        x_lda = lda.fit(x_train_lda, self.y_train)
        x_train_new = x_lda.transform(x_train_lda)
        x_test_new = x_lda.transform(x_test_lda)
        return (x_train_new, self.y_train, x_test_new, self.y_test)
    
    def get_tsne(self, num_components):
        x_train_tsne = tf.reshape(self.x_train, [self.train_shape[0], self.train_shape[1]*self.train_shape[2]*self.train_shape[3]]).eval(session=tf.Session())
        x_test_tsne = tf.reshape(self.x_test, [self.test_shape[0], self.test_shape[1]*self.test_shape[2]*self.test_shape[3]]).eval(session=tf.Session())
        tsne = TSNE(n_components=num_components, init='pca')
        x_train_new = tsne.fit_transform(x_train_tsne)
        x_test_new = tsne.fit_transform(x_test_tsne)
        return (x_train_new, self.y_train, x_test_new, self.y_test)
    

## Defining models

In [7]:
class Models:
    def __init__(self, x_train, y_train, x_test, y_test):
        self.x_train = x_train
        self.y_train = y_train
        self.x_test  = x_test
        self.y_test  = y_test
    
    def linear_svm(self, penalty='l2', C=1, loss='squared_hinge'):
        clf = svm.LinearSVC(penalty=penalty, C=C)
        clf.fit(self.x_train, self.y_train)
        pred = np.rint(clf.predict(self.x_test))
        return (f1_score(self.y_test, pred, average='macro'), accuracy_score(self.y_test, pred))

    
    def kern_svm(self, kern='rbf', C=1, gamma='scale'):
        clf = svm.SVC(kernel=kern, C=C, gamma=gamma)
        clf.fit(self.x_train, self.y_train)
        pred = np.rint(clf.predict(self.x_test))
        return (f1_score(self.y_test, pred, average='macro'), accuracy_score(self.y_test, pred))

    def linear_classifier(self, norm='l2', solver='sag', reg=1):
        clf = linear_model.LogisticRegression(penalty=norm, C=reg, solver=solver)
        clf.fit(self.x_train, self.y_train)
        pred = np.rint(clf.predict(self.x_test))
        return (f1_score(self.y_test, pred, average='macro'), accuracy_score(self.y_test, pred))

    def mlp(self, solver='adam', activation='relu', h_size=(100,), eta=0.001, l_mode='constant', b_1=0.9, b_2=0.999, eps=1e-8):
        clf = MLPClassifier(solver=solver, activation=activation, hidden_layer_sizes=h_size, learning_rate_init=eta,
                            learning_rate=l_mode, beta_1=b_1, beta_2=b_2, epsilon=eps)
        clf.fit(self.x_train, self.y_train)
        pred = np.rint(clf.predict(self.x_test))
        return (f1_score(self.y_test, pred, average='macro'), accuracy_score(self.y_test, pred))

    def get_dt(self, max_depth):
        clf = tree.DecisionTreeClassifier(max_depth=max_depth)
        clf.fit(self.x_train, self.y_train)
        pred = np.rint(clf.predict(self.x_test))
        return (f1_score(self.y_test, pred, average='macro'), accuracy_score(self.y_test, pred))

## Running code

In [8]:
train_num = 6000
test_num = 1000
x_train_raw, y_train_raw, x_test_raw, y_test_raw = load_cifar10_data(train_num, test_num)
data = Representation(x_train_raw, y_train_raw, x_test_raw, y_test_raw)

('Total Shape of Train Data:', (50000, 32, 32, 3))
('Total Shape of Train Label:', (50000, 1))
('Total Shape of Test Data:', (10000, 32, 32, 3))
('Total Shape of Test Label:', (10000, 1))
('Transformed Train Data:', TensorShape([Dimension(6000), Dimension(32), Dimension(32), Dimension(1)]))
('Transformed Train Label:', (6000, 1))
('Transformed Test Data:', TensorShape([Dimension(1000), Dimension(32), Dimension(32), Dimension(1)]))
('Transformed Test Label:', (1000, 1))


In [9]:
raw_data = data.get_raw()
raw_model = Models(raw_data[0], raw_data[1], raw_data[2], raw_data[3])

In [13]:
pca_data = data.get_raw()
pca_model = Models(raw_data[0], raw_data[1], raw_data[2], raw_data[3])

IndexError: tuple index out of range