<a href="https://colab.research.google.com/github/asjnhy/AI-courses-2019/blob/master/MNIST-Polynomial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Google Drive Mount

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!pip install cupy

Collecting cupy
[?25l  Downloading https://files.pythonhosted.org/packages/dc/89/99f980706c61e6b96a579a81dea3eb68c22df1b526bf357673be5e18fe31/cupy-7.0.0.tar.gz (3.7MB)
[K     |████████████████████████████████| 3.7MB 4.7MB/s 
Building wheels for collected packages: cupy
  Building wheel for cupy (setup.py) ... [?25l[?25hdone
  Created wheel for cupy: filename=cupy-7.0.0-cp36-cp36m-linux_x86_64.whl size=28356280 sha256=ad54cc16377026383458b559e3e3f6fc1a8f17cb1a1702b0d073925a85660897
  Stored in directory: /root/.cache/pip/wheels/4b/33/37/f412224f7550a11ee18a9f41f7aff28df380bfa994f7280ab3
Successfully built cupy
Installing collected packages: cupy
Successfully installed cupy-7.0.0


# Import Packages

In [0]:
import os
import math
import time
import struct
import numpy as np
import cupy as cp
from datetime import datetime
from matplotlib import pyplot
from joblib import Parallel, delayed
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin

# Binary Classifier

In [0]:
class BinaryClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, batch_size=16, max_iter=50, learning_rate=0.01, random_state=1, C=100):
        self.batch_size = batch_size
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.random_state = random_state
        self.C = C
        self.rgen = np.random.RandomState(self.random_state)
        
    def fit(self, X, y):
        # Exception Handling
        if self.C < 0:
            raise ValueError("The C value of %r must be positive" % self.C)
        if ((self.learning_rate < 0) or (self.learning_rate > 1)):
            raise ValueError("The learning_rate value of %r is invalid." % self.learning_rate,
                             "Set the learning_rate value between 0.0 and 1.0.")        
            
        n_batches = math.ceil(len(X) / self.batch_size)
        rest_batch_size = X.shape[0] - (n_batches-1) * self.batch_size
        
        self.w_ = cp.array(self.rgen.normal(loc=0.0, scale=0.01, size=X.shape[1]))
        self.b_ = 0.
        
        for epoch in range(self.max_iter):
            X, y = self.shuffle(X, y)
            
            for j in range(n_batches - 1):
                self.calculateGradientAndUpdate(X, y, batch_size = self.batch_size, n_batch = j)
            self.calculateGradientAndUpdate(X, y, batch_size = rest_batch_size, n_batch = j)
            
        return self
    
    def hypothesis(self, X):
        return cp.dot(cp.array(X), self.w_) + self.b_
    
    def shuffle(self, X, y):
        shuffle_index = np.arange(X.shape[0])
        np.random.shuffle(shuffle_index)
        return X[shuffle_index], y[shuffle_index]
    
    def calculateGradientAndUpdate(self, X, y, batch_size, n_batch):
        X_mini = X[n_batch*batch_size : (n_batch+1)*batch_size]
        y_mini = y[n_batch*batch_size : (n_batch+1)*batch_size]
        X_mini = cp.array(X_mini)
        y_mini = cp.array(y_mini)

        grad_w = cp.zeros(X.shape[1])
        grad_b = 0
        mask = cp.less_equal(cp.multiply(y_mini, self.hypothesis(X_mini)), 1)
        
        Xy = cp.multiply(X_mini.T, y_mini)
        masked_Xy = cp.multiply(Xy, mask)
        grad_w = cp.sum(-masked_Xy, axis=1)
        grad_w /= batch_size
        grad_w += self.w_/self.C
        self.w_ -= self.learning_rate * grad_w
        
        masked_y = cp.multiply(y_mini, mask)
        grad_b = cp.sum(-masked_y, axis=0)
        grad_b = grad_b / batch_size
        self.b_ -= self.learning_rate * grad_b
        return grad_w, grad_b

# Multiclass Classifier

In [0]:
class MulticlassClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, batch_size=16, max_iter=50, learning_rate=0.01, random_state=1, C=100):
        self.batch_size = batch_size
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.random_state = random_state
        self.C = C
        
    def fit(self, X, y):
        self.labels = np.unique(y) # 0 ~ 9
        # self.labels = cp.unique(y) # 0 ~ 9
        self.outputs_ = []
        for label in range(len(self.labels)):
            # y_binary = cp.where(cp.array(y == label), 1, -1)
            y_binary = np.where(np.array(y==label),1,-1)
            b_c = BinaryClassifier(self.batch_size, self.max_iter, 
                                   self.learning_rate, self.random_state, self.C)
            b_c.fit(X, y_binary)
            self.outputs_.append(b_c)
        return self
        
    def predict(self, X):
        prediction = []
        for o in self.outputs_:
            prediction.append(o.hypothesis(X))
        return self.labels[np.argmax(prediction, axis=0)]
        # return self.labels[cp.argmax(prediction, axis=0)]

# MNIST Read Function

In [0]:
def read(images, labels):
    with open(labels, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)

    with open(images, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

def read_no_label(images):
    with open(images, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(60000, 784)
    return images

# Read MNIST & Split for Valiation (80k)

In [0]:
# 경로 수정하세요 !
X, y = read(os.getcwd() + '/drive/My Drive/Colab Notebooks/data/newtrain-images-idx3-ubyte', 
            os.getcwd() + '/drive/My Drive/Colab Notebooks/data/newtrain-labels-idx1-ubyte')
# X_test_no_label = read_no_label(os.getcwd()+'/data/testall-images-idx3-ubyte')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print(X_test.shape)

(24000, 784)


# Preprocessing : Convolutional Feature Extraction

## Implementation

In [0]:
def zero_padding(img, n=1):

    m = img.shape[0]
    w = img.shape[1]
    h = img.shape[2]
    
    padded_img = np.ones((m, w + 2 * n, h + 2 * n))
    
    padded_img[:, n : padded_img.shape[1] - n, n : padded_img.shape[2] - n] = img
    
    return padded_img
    
def horizontal_filter(img):
    h_filter = np.array([
        [ 0, 0, 0],
        [ 0, 1, 0],
        [-1, 0, 0]
    ])
    h_filter2 = np.array([
        [-1, -2, -1],
        [ 0,  0,  0],
        [ 1,  2,  1]
    ])
    h_filter.reshape(1,3,3)
    h_filter2.reshape(1,3,3)
    m = img.shape[0]
    w = img.shape[1]
    h = img.shape[2]
    horizontal_grad = np.zeros((m, w - 2, h - 2))
    horizontal_grad2 = np.zeros((m, w - 2, h - 2))

    for i in range(1, w - 1):
        for j in range(1, h - 1):
            images_slice = img[:, i - 1 : i + 2, j - 1 : j + 2]
            horizontal_grad[:, i - 1, j - 1] = np.sum(np.multiply(images_slice, h_filter), axis=(1, 2))
            horizontal_grad2[:, i - 1, j - 1] = np.sum(np.multiply(images_slice, h_filter2), axis=(1, 2))
            
    return horizontal_grad, horizontal_grad2
    
def MaxPooling(img):

    m = img.shape[0]
    w = img.shape[1]
    h = img.shape[2]
    
    img = zero_padding(img, 1)
    img2, img3 = horizontal_filter(img)
    
    pooling_grad = np.zeros((m, w//2, h//2))
    pooling_grad2 = np.zeros((m, w//2, h//2))

    for i in range(0, w//2):
        for j in range(0, h//2):
            pooling_grad[: , i , j ] = np.max(img2[: , 2*i : 2*i + 2, 2*j : 2*j + 2])
            pooling_grad2[: , i , j ] = np.max(img3[: , 2*i : 2*i + 2, 2*j : 2*j + 2])

    pooling_grad = pooling_grad.reshape(m,-1)
    pooling_grad2 = pooling_grad.reshape(m,-1)    
            
    return pooling_grad, pooling_grad2

## Convolution

In [0]:
X_train_reshaped = X_train.reshape(X_train.shape[0], 28, 28)
X_test_reshaped = X_test.reshape(X_test.shape[0], 28, 28)

X_train_conv1, X_train_conv2 = MaxPooling(X_train_reshaped)
X_test_conv1, X_test_conv2 = MaxPooling(X_test_reshaped)

## PCA

In [0]:
pca = PCA(n_components=0.93)

X_train_pca = pca.fit_transform(X_train) 
X_test_pca = pca.transform(X_test)

## Polynomial

In [0]:
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=True, order='F')
X_train_pca_poly = poly.fit_transform(X_train_pca)
X_test_pca_poly = poly.transform(X_test_pca)

# Check shape

In [0]:
print("X_train : ", X_train.shape)
print("X_train_conv1 : ", X_train_conv1.shape)
print("X_train_conv2 : ", X_train_conv2.shape)
print("X_train_pca : ", X_train_pca.shape)
print("X_train_pca_poly : ", X_train_pca_poly.shape)

X_train :  (56000, 784)
X_train_conv1 :  (56000, 196)
X_train_conv2 :  (56000, 196)
X_train_pca :  (56000, 119)
X_train_pca_poly :  (56000, 7260)


# Hstack

In [0]:
X_train_pca_poly_conv12 = np.hstack((X_train_pca_poly,
                                     X_train_conv1,
                                     X_train_conv2))

X_test_pca_poly_conv12 = np.hstack((X_test_pca_poly,
                                    X_test_conv1,
                                    X_test_conv2))

In [0]:
X_train_pca_poly_conv12.shape

(56000, 7652)

# Training & Testing

In [0]:
MC=MulticlassClassifier(max_iter=5, batch_size=256, learning_rate=0.01, C=1000)

In [0]:
print("Start Time : ", datetime.now())

start = time.time()
MC.fit(X_train_pca_poly_conv12, y_train)

y_pred = MC.predict(X_test_pca_poly_conv12)
score = accuracy_score(y_test, y_pred)

print("End Time : ", datetime.now())
print("Training Time : ", time.time() - start)
print(score)

Start Time :  2019-12-14 17:07:18.581149
End Time :  2019-12-14 17:10:44.478631
Training Time :  205.89781498908997
0.9747083333333333


# Grid Search

In [0]:
# param_grid = [{
#     'C' : [0.01, 0.1, 1, 10, 100, 1000, 10000],
#     'learning_rate' : [0.1, 0.01, 0.001],
#     'batch_size' : [32],
#     'max_iter' : [5]
# }]

# grid_search = GridSearchCV(MulticlassClassifier(), 
#                             param_grid=param_grid, 
#                             cv=10, scoring='accuracy',
#                             n_jobs=-1)

# grid_search.fit(X_train_pca_poly_conv12, y_train)
# print(grid_search.best_params_)
# print(grid_search.best_score_)