In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from tqdm import tqdm

In [3]:
from LeNet import LeNet
from data_process import load_data,data_convert
from evaluate import softmax,cal_accuracy

In [4]:
mnist_dir = "./mnist_data/"
train_data_dir = "train-images.idx3-ubyte"
train_label_dir = "train-labels.idx1-ubyte"
test_data_dir = "t10k-images.idx3-ubyte"
test_label_dir = "t10k-labels.idx1-ubyte"

In [5]:
train_images, train_labels, test_images, test_labels = load_data(mnist_dir, train_data_dir, train_label_dir, test_data_dir, test_label_dir)
print("Got data. ") 

Loading MNIST data from files...
./mnist_data/train-images.idx3-ubyte
Load images from ./mnist_data/train-images.idx3-ubyte, number: 60000, data shape: (60000, 784)
Load images from ./mnist_data/train-labels.idx1-ubyte, number: 60000, data shape: (60000, 1)
Load images from ./mnist_data/t10k-images.idx3-ubyte, number: 10000, data shape: (10000, 784)
Load images from ./mnist_data/t10k-labels.idx1-ubyte, number: 10000, data shape: (10000, 1)
Got data. 


In [None]:
def show_faults(fault_list):
    for i in fault_list:
        img = np.reshape(test_images[i, :], (28, 28))
        label = np.argmax(test_images [i, :])
        plt.matshow(img, cmap = plt.get_cmap('gray'))
        plt.figure(figsize=(1,1))
        plt.show()

In [7]:
train_images = train_images.astype(float)
x,y = data_convert(train_images, train_labels,60000,10)
x_val , y_val = data_convert(test_images,test_labels,10000,10)

In [8]:
def shuffle_batch(batch_size):

    index = np.random.randint(0,len(x),batch_size)
    return x[index],y.T[index].T

In [9]:
def softmax(y_pred,y):
    batch_size ,_ = y_pred.shape
    #y_pred = y_pred / y_pred.max(axis=1)[:,None] #防止溢出
    #y_pred+=1e-5
    y_pred = np.exp(y_pred)
    y_sum = y_pred.sum(axis = 1)
    y_pred = y_pred/y_sum[:,None]
    loss = -np.log(y_pred).T * y
    loss = loss.sum()/batch_size
    grad = y_pred - y.T
    grad /= batch_size
    acc = (y_pred.argmax(axis=1) == y.argmax(axis=0)).mean()
    return loss,grad,acc

In [8]:
def softmax_loss(y_pred, y):
    # y_pred: (N, C)
    # y: (N, 1)
    N = y_pred.shape[0]
    ex = np.exp(y_pred)
    sumx = np.sum(ex, axis=1)
    loss = np.mean(np.log(sumx)-y_pred[range(N), list(y)])
    grad = ex/sumx.reshape(N, 1)
    grad[range(N), list(y)] -= 1
    grad /= N
    acc = np.mean(np.argmax(ex/sumx.reshape(N, 1), axis=1) == y.reshape(1, y.shape[0]))
    return loss, grad, acc

In [13]:
class Adam:
    def __init__(self, model, lr=1e-3, beta1=0.9, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.model = model
        self.m = None
        self.v = None
        self.params = None
        self.grad = None

    def step(self):
        self.params = model.get_params()
        self.grad = model.get_grad()
        if self.m is None:
            self.m, self.v = [], []
            for param in self.params:
                self.m.append(np.zeros_like(param))
            for g in self.grad:
                self.v.append(np.zeros_like(g))
            assert(len(self.m) == len(self.params))
            assert(len(self.v) == len(self.grad))

        self.iter += 1
        lr_t = self.lr * np.sqrt(1.0 - self.beta2 ** self.iter) / (1.0 - self.beta1 ** self.iter)

        for i in range(len(self.params)):
            self.m[i] += (1 - self.beta1) * (self.grad[i] - self.m[i])
            self.v[i] += (1 - self.beta2) * (self.grad[i] ** 2 - self.v[i])
            self.params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)

In [10]:
from AdaGrad import AdaGrad

In [11]:
batch_size = 256


model = LeNet()
optimizer = AdaGrad(model,1e-3)
for e in range(10):
    pbar = tqdm(range(0, int(x.shape[0]/batch_size)), ncols=100)
    for i in pbar:
        X_train,y_train = shuffle_batch(batch_size)
        y_pred = model.fit(X_train,batch_size)

        loss, grad, acc = softmax(y_pred, y_train)
        model.back_prop(grad)
        model.update()
        #optimizer.update()
        pbar.set_postfix(loss=loss, acc=acc)

100%|██████████████████████████████████████| 234/234 [07:03<00:00,  1.81s/it, acc=0.918, loss=0.326]
100%|██████████████████████████████████████| 234/234 [07:06<00:00,  1.82s/it, acc=0.953, loss=0.171]
100%|██████████████████████████████████████| 234/234 [06:40<00:00,  1.71s/it, acc=0.961, loss=0.113]
100%|██████████████████████████████████████| 234/234 [06:50<00:00,  1.75s/it, acc=0.953, loss=0.148]
 57%|█████████████████████▌                | 133/234 [03:51<03:28,  2.07s/it, acc=0.941, loss=0.188]

: 

In [15]:
batch_size = 256


model = LeNet()
optimizer = Adam(model,1e-3)
for e in range(10):
    pbar = tqdm(range(0, int(x.shape[0]/batch_size)), ncols=100)
    for i in pbar:
        X_train,y_train = shuffle_batch(batch_size)
        y_pred = model.fit(X_train,batch_size)

        loss, grad, acc = softmax(y_pred, y_train)
        #loss, grad, acc = softmax_loss(y_pred, y_train.argmax(axis=0))
        model.back_prop(grad)
        #model.update(0.001)
        optimizer.step()
        pbar.set_postfix(loss=loss, acc=acc)

# val_X = data["X_val"]
# val_y = data["y_val"]
# y_pred = model.forward(val_X)
# y_pred = np.argmax(y_pred, axis=1)
# acc = np.mean(y_pred == val_y.reshape(1, val_y.shape[0]))
# if acc > best_acc:
#     best_acc = acc
#     best_weight = model.get_params()
# pbar.set_postfix(val_acc=acc)

  5%|█▉                                      | 11/234 [00:19<06:43,  1.81s/it, acc=0.328, loss=1.92]


KeyboardInterrupt: 