In [1]:
import os 
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
     
# set names to the paths because they're too long 
data_path = 'D:\\Python\\MachineLearning\\MachineLearning\\week07\\data\\handwritting' 
# train path 
train_images_path = os.path.join(data_path, 'train-images-idx3-ubyte.gz') 
train_labels_path = os.path.join(data_path, 'train-labels-idx1-ubyte.gz') 
# test path 
test_images_path = os.path.join(data_path, 't10k-images-idx3-ubyte.gz') 
test_labels_path = os.path.join(data_path, 't10k-labels-idx1-ubyte.gz')

In [2]:
def get_mnist_data(images_path, labels_path, num_images 
                   , shuffle=False, _is=True, image_size=28): 
    """ 
        This shuffle param is active when .gz is downloaded at: 
            - 'http://yann.lecun.com/exdb/mnist/' 
            - This function return random num_images in 60000 or 10000   
    """ 
    # read data 
    import gzip           # to decompress gz (zip) file 
 
    # open file training to read training data 
    f_images = gzip.open(images_path,'r') 
     
    # skip 16 first bytes because these are not data, only header infor 
    f_images.read(16) 
     
    # general: read num_images data samples if this parameter is set; 
    # if not, read all (60000 training or 10000 test) 
    real_num = num_images if not shuffle else (60000 if _is else 10000) 
 
    # read all data to buf_images (28x28xreal_num) 
    buf_images = f_images.read(image_size * image_size * real_num) 
     
    # images 
    images = np.frombuffer(buf_images, dtype=np.uint8).astype(np.float32) 
    images = images.reshape(real_num, image_size, image_size,) 
     
    # Read labels    
    f_labels = gzip.open(labels_path,'r') 
    f_labels.read(8) 
     
    labels = np.zeros((real_num)).astype(np.int64) 
     
    # rearrange to correspond the images and labels 
    for i in range(0, real_num):    
        buf_labels = f_labels.read(1) 
        labels[i] = np.frombuffer(buf_labels, dtype=np.uint8).astype(np.int64)[0]
     # shuffle to get random images data 
    if shuffle is True: 
        rand_id = np.random.randint(real_num, size=num_images) 
         
        images = images[rand_id, :] 
        labels = labels[rand_id,] 
     
    # change images data to type of vector 28x28 dimentional 
    images = images.reshape(num_images, image_size * image_size) 
    return images, labels 

In [4]:
import math

# one-hot coding
from scipy import sparse
def convert_labels(y, C):
    Y = sparse.coo_matrix((np.ones_like(y),
        (y, np.arange(len(y)))), shape = (C, len(y))).toarray()
    return Y

#softmax for multi-class
def softmax(V):
    e_V = np.exp(V - np.max(V, axis = 0, keepdims = True))
    #print(np.max(V, axis = 0, keepdims = True))
    Z = e_V / e_V.sum(axis = 0)
    return Z

#definition of ReLU, or you can use maximum directly
def ReLU(V):
    return np.maximum(V, 0)

# cost or loss function
def cost(Y, Yhat):
    return -np.sum(Y*np.log(Yhat))/Y.shape[1]


In [27]:
train_images, train_labels = get_mnist_data(train_images_path, train_labels_path, 60000, shuffle=True) 
 
test_images, test_labels = get_mnist_data(test_images_path, test_labels_path, 10000, _is=False, shuffle=True) 
 
print(train_images.shape, train_labels.shape) 
print(test_images.shape, test_labels.shape) 

all_images = np.concatenate([train_images, test_images], axis=0)
all_labels = np.concatenate([train_labels, test_labels], axis=0)

scaler = StandardScaler()
all_images_scaled = scaler.fit_transform(all_images)
print(all_images_scaled.shape)

(60000, 784) (60000,)
(10000, 784) (10000,)
(70000, 784)


In [28]:
# Doing PCA, note that each row is a datapoint
from sklearn.decomposition import PCA

# remain dim. k = 100 - change it! 
pca = PCA(n_components=100) 

# then apply to data X
pca.fit(all_images_scaled)

In [29]:
# then build projection matrix 
U = pca.components_.T
U.shape

(784, 100)

In [30]:
D = 28 * 28
N = all_images_scaled.shape[0]
X = all_images_scaled.T
Xhat = np.zeros((D, N))
x_mean = X.mean(1)
for i in range(N):
    Xhat[:,i] = X[:,i] - x_mean[:]
    
    
# Reduced dim. data Z (project of Xhat onto sub-space by Uk - bases)
Z = U.T.dot(Xhat)
Z.shape    

(100, 70000)

In [32]:
#80% for training set - You can change this rate by yourselves
M = (int)(Z.shape[1]*0.95)
X_train = Z[:, :M]
Y_train = convert_labels(all_labels[:M], 10)

print(X_train.shape)
print(Y_train.shape)

W1, W2, b1, b2 = ANN_3layer_SolveClassification(X_train, Y_train, 1e-2,4000, num = 100)

(100, 66500)
(10, 66500)
iter 0, loss: 2.302792
iter 100, loss: 2.228878
iter 200, loss: 1.988343
iter 300, loss: 1.525824
iter 400, loss: 1.110886
iter 500, loss: 0.847378
iter 600, loss: 0.673944
iter 700, loss: 0.564893
iter 800, loss: 0.495435
iter 900, loss: 0.448740
iter 1000, loss: 0.415549
iter 1100, loss: 0.390785
iter 1200, loss: 0.371546
iter 1300, loss: 0.356060
iter 1400, loss: 0.343235
iter 1500, loss: 0.332355
iter 1600, loss: 0.322932
iter 1700, loss: 0.314642
iter 1800, loss: 0.307240
iter 1900, loss: 0.300557
iter 2000, loss: 0.294487
iter 2100, loss: 0.288920
iter 2200, loss: 0.283779
iter 2300, loss: 0.278985
iter 2400, loss: 0.274488
iter 2500, loss: 0.270250
iter 2600, loss: 0.266244
iter 2700, loss: 0.262453
iter 2800, loss: 0.258861
iter 2900, loss: 0.255435
iter 3000, loss: 0.252156
iter 3100, loss: 0.249012
iter 3200, loss: 0.245993
iter 3300, loss: 0.243091
iter 3400, loss: 0.240301
iter 3500, loss: 0.237613
iter 3600, loss: 0.235016
iter 3700, loss: 0.232502

In [34]:
from sklearn.metrics import accuracy_score # for evaluating results
Y = all_labels
Y_pred_train = predict(W1, W2, b1, b2, X_train)
print('accuracy training data: ', accuracy_score(Y[:M], Y_pred_train))

X_val = Z[:, M:]
Y_val = convert_labels(Y[M:], 11)


Y_pred_val = predict(W1, W2, b1, b2, X_val)
print(X_val.shape, Y_pred_val.shape, 'accuracy validation data: ', accuracy_score(Y[M:], Y_pred_val))
print(Y[M:])
print(Y_pred_val)

accuracy training data:  0.9330977443609022
(100, 3500) (3500,) accuracy validation data:  0.9208571428571428
[0 7 8 ... 8 2 9]
[0 2 8 ... 8 2 9]
