In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from future import standard_library
standard_library.install_aliases()
from builtins import range
from builtins import object
import os
import pickle as pickle
from IPython.display import clear_output

In [None]:
VAL_RATIO = 0.1
TEST_RATIO = 0.2

In [33]:
MNIST = fetch_openml("mnist_784")

In [34]:
def load_mnist():
    val_len = int(len(MNIST["data"]) * VAL_RATIO)
    test_len = int(len(MNIST["data"]) * TEST_RATIO)
    train_len = int(len(MNIST["data"]) - val_len - test_len)
    X_train= MNIST["data"][:train_len].reshape((-1,1,28,28)) / 255.0
    X_val = MNIST["data"][train_len : train_len + val_len].reshape((-1,1,28,28)) / 255.0
    X_test = MNIST["data"][-test_len:].reshape((-1,1,28,28)) / 255.0
    y_train = MNIST["target"][:train_len]
    y_val = MNIST["target"][train_len : train_len + val_len] 
    y_test = MNIST["target"][-test_len:]
    return X_train, X_val, X_test, y_train, y_val, y_test


In [35]:
X_train, X_val, X_test, y_train, y_val, y_test = load_mnist()

basic layers

In [36]:
#fully connected 
def affine_forward(x, w, b):
    #print("affine forward")
    #print("affine X shape : {}, w shape {}, bias shape {}".format(x.shape, w.shape, b.shape))
    out = None
    x_row = x.reshape(x.shape[0], -1)
    
    out = np.matmul(x_row,w) + b
    cache = (x, w, b)
    return out, cache


def affine_backward(dout, cache):
    #print("affine_backward")
    #print("dout : {}".format(dout.shape))
    x, w, b = cache
    dx, dw, db = None, None, None
    dx = np.matmul(dout, w.T)
    dx = dx.reshape(x.shape)
    x = x.reshape(x.shape[0], -1)
    dw = np.matmul(x.T, dout)
    db = np.sum(dout, axis = 0)
    return dx, dw, db

#relu
def relu_forward(x):
   # print("relu forward")
    out = None
    out = np.maximum(x,0)

    cache = x
    return out, cache


def relu_backward(dout, cache):
    #print("relu backward")
    dx, x = None, cache
    #print("dout : {}, x : {}".format(dout.shape, x.shape))
    dx = (x > 0) * dout
    return dx

#dropout
def dropout_forward(x, dropout_param):
    p, mode = dropout_param["p"], dropout_param["mode"]
    if "seed" in dropout_param:
        np.random.seed(dropout_param["seed"])

    mask = None
    out = None

    if mode == "train":
        mask = np.random.binomial(np.ones_like(x), p) / p
    elif mode == "test":
        out = x
    cache = (dropout_param, mask)
    out = out.astype(x.dtype, copy=False)

    return out, cache


def dropout_backward(dout, cache):
    dropout_param, mask = cache
    mode = dropout_param["mode"]

    dx = None
    if mode == "train":
        dx = dout * mask
    elif mode == "test":
        dx = dout
    return dx

#convolution 
def conv_forward_naive(x, w, b, conv_param):
    #print("conv forward")
    out = None
    N,C,H,W = x.shape
    F,C,HH,WW = w.shape
    padding, stride = conv_param["pad"], conv_param["stride"]
    
    assert (H + 2*padding - HH) % stride == 0, "shape error"
    assert (W + 2*padding - WW) % stride == 0, "shape error"
    
    out_N = N
    out_C = F
    out_H = int((H + 2*padding - HH) / stride + 1)
    out_W = int((W + 2*padding - WW) / stride + 1)
    out = np.zeros((out_N, out_C, out_H, out_W))
    padded_x = np.pad(x, ((0,0), (0,0), (padding,padding), (padding,padding)))
       
    for n in range(out_N):
        for c in range(out_C):
            for h in range(out_H):
                for w_ in range(out_W):
                    out[n,c,h,w_] = np.sum(padded_x[n,:, h*stride: h*stride + HH, w_*stride : w_*stride + WW] * w[c]) + b[c]
    
    cache = (x, w, b, conv_param)
    #print("conv : x {} w {} b {}".format(x.shape, w.shape, b.shape))
    return out, cache


def conv_backward_naive(dout, cache):
    #print("conv backward")
    dx, dw, db = None, None, None
    x, w, b, conv_param = cache
    padding = conv_param["pad"]
    stride = conv_param["stride"]
    N,C,H,W = x.shape
    F,C,HH,WW = w.shape
    
    assert (H + 2*padding - HH) % stride == 0, "shape error"
    assert (W + 2*padding - WW) % stride == 0, "shape error"

    dx = np.zeros_like(x)
    dw = np.zeros_like(w)
    db = np.zeros_like(b)
    
    out_N = N
    out_C = F
    out_H = int((H + 2*padding - HH) / stride + 1)
    out_W = int((W + 2*padding - WW) / stride + 1)
    out = np.zeros((out_N, out_C, out_H, out_W))
    padded_x = np.pad(x, ((0,0), (0,0), (padding, padding), (padding, padding)))
    padded_dx = np.pad(dx, ((0,0), (0,0), (padding, padding), (padding, padding)))
    #print("padded dx : {}  dout : {}  w : {}".format(padded_dx.shape, dout.shape, w.shape))
    #backpropagation 
    for n in range(out_N):
        for c in range(out_C):
            for h in range(out_H):
                for w_ in range(out_W):
                    window_x = padded_x[n, :, h*stride:h*stride + HH, w_*stride:w_*stride + WW]
                    #bias 는 add gate
                    db[c] += dout[n,c,h,w_]
                    #weight는 mul gate
                    dw[c] += window_x * dout[n,c,h,w_]
                    #x는 mul gate
                    padded_dx[n, :, h*stride:h*stride + HH, w_*stride:w_*stride + WW] += w[c]* dout[n,c,h,w_]
    #padding한 만큼 자르기
    dx = padded_dx[:, :, padding:padding + H, padding : padding + W]
    
    return dx, dw, db

#max pooling
def max_pool_forward_naive(x, pool_param):
    #print("pooling forward")
    out = None
    N, C, H, W = x.shape
    pooling_h = pool_param["pool_height"]
    pooling_w = pool_param["pool_width"]
    stride = pool_param["stride"]
    
    out_n = N
    out_c = C
    out_h = int((H - pooling_h) / stride + 1)
    out_w = int((W - pooling_w) / stride + 1)
    
    out = np.zeros((out_n,out_c, out_h, out_w))
    for n in range(out_n):
        for c in range(out_c):
            for h in range(out_h):
                for w in range(out_w):
                    window_x = x[n,c,h*stride : h*stride + pooling_h, w*stride : w*stride + pooling_w]
                    out[n,c,h,w] = np.max(window_x)
    cache = (x, pool_param)
    return out, cache


def max_pool_backward_naive(dout, cache):
    #print("pooling backward")
    dx = None
    x, pool_param = cache
    N, C, H, W = x.shape
    pooling_h = pool_param["pool_height"]
    pooling_w = pool_param["pool_width"]
    stride = pool_param["stride"]
    
    out_n = N
    out_c = C
    out_h = int((H - pooling_h) / stride + 1)
    out_w = int((W - pooling_w) / stride + 1)
    #print("dout : {}".format(dout.shape))
    dx = np.zeros_like(x)
    for n in range(out_n):
        for c in range(out_c):
            for h in range(out_h):
                for w in range(out_w):
                    window_x = x[n,c,h*stride : h*stride + pooling_h, w*stride : w*stride + pooling_w]
                    mask = (window_x == np.max(window_x))
                    dx[n,c,h*stride : h*stride + pooling_h,w*stride : w*stride + pooling_w] = mask * dout[n,c,h,w]

    return dx
#softmax
def softmax(x,y):
    #print("softmax")
    logits = x - np.max(x, axis= 1, keepdims = True)
    Z = np.sum(np.exp(logits), axis =1 ,keepdims = True)
    log_probs = logits - np.log(Z)
    probs = np.exp(log_probs)
    N = x.shape[0]
    y = y.astype(np.int8)
    loss = -np.sum(log_probs[np.arange(N), y]) / N
    dx = probs.copy()
    dx[np.arange(N) , y] -=1
    dx /= N
    return loss, dx

#For predict
def predict_softmax(x):
    #print("predict softmax")
    logits = x - np.max(x, axis= 1, keepdims = True)
    Z = np.sum(np.exp(logits), axis =1 ,keepdims = True)
    log_probs = logits - np.log(Z)
    probs = np.exp(log_probs)
    return probs


In [37]:
def sgd(w, dw, config=None):
    if config is None:
        config = {}
    config.setdefault("learning_rate", 1)

    w -= config["learning_rate"] * dw
    return w, config

model sequences    
conv_param = {"pad", "stride", "num_filters", "channel", "filter_size"}    
pool_param = {"pool_height", "pool_width", "stride"}
dense_param = {"num_filters", "H", "W", "hidden_dim"}

In [38]:
import sys
class Sequential():
    def __init__(self, weight_scale = 1e-03, reg = 0.0, dtype = np.float32):
        self.pipeline = {}
        self.weight_bias = []
        self.layers_name = set(["Relu", "Conv", "Dense", "Pooling"])
        self.weight_scale = weight_scale
        self.reg = reg
        self.dtype = dtype
        self.num_conv = 1
        self.num_bias = 1
        self.num_relu = 1
        self.num_pool = 1
        self.num_dense = 1
        
        
        
        
    def add(self, layer, params = None):
        
        assert layer in self.layers_name, "You have to choose layer in {}".format(self.layers_name)
            
        if layer == "Conv":
            assert params is not None, "Enter params"
            self.pipeline["Conv_{}".format(self.num_conv)] = [self.weight_scale * np.random.randn(params["num_filters"], params["channel"], params["filter_size"], 
                                                                            params["filter_size"]),np.zeros(params["num_filters"]), params]
            self.weight_bias.append("Conv_{}".format(self.num_conv))
            self.weight_bias.append("Bias_{}".format(self.num_bias))
            self.num_conv += 1
            self.num_bias += 1
        
        elif layer == "Relu":
            self.pipeline["Relu_{}".format(self.num_relu)] = (None, None)
            self.num_relu +=1
            
        elif layer == "Dense":
            assert params is not None, "Enter params"
            self.pipeline["Dense_{}".format(self.num_dense)] = [self.weight_scale * np.random.randn(params["num_filters"] * params["H"] * params["W"] , params["hidden_dim"]), 
                                                                np.zeros(params["hidden_dim"]), params]
            self.weight_bias.append("Dense_{}".format(self.num_dense))
            self.weight_bias.append("Bias_{}".format(self.num_bias))
            self.num_dense += 1
            self.num_bias += 1
        
        elif layer == "Pooling":
            assert params is not None, "Enter params"
            self.pipeline["Pooling_{}".format(self.num_pool)] = (None, params)
            self.num_pool += 1
            
    def type_fix(self):
        for layer, weight in self.pipeline.items():
            if layer[:4] == "Pool" or layer[:4] == "Relu":
                continue
            self.pipeline[layer] = weight.astype(self.dtype)
            
    def loss(self, x, y= None):
        cache = None
        #For backpropagation.
        cache_list = []
        weight_list = []
        bias_list = []
        for layer in self.pipeline.keys():
            layer_name = layer[:4]
            if layer_name == "Pool":
                pool_param = self.pipeline[layer][1]
                x, cache = max_pool_forward_naive(x, pool_param)
                cache_list.append(cache)
                
            elif layer_name == "Relu":
                x, cache = relu_forward(x)
                cache_list.append(cache)
                
            elif layer_name == "Conv":
                filter_size = self.pipeline[layer][0].shape[2]
                conv_weight = self.pipeline[layer][0]
                bias_weight= self.pipeline[layer][1]
                weight_list.append(conv_weight)
                bias_list.append(bias_weight)
                conv_param = self.pipeline[layer][2]
                x, cache = conv_forward_naive(x, conv_weight, bias_weight, conv_param)
                cache_list.append(cache)
                
            elif layer_name == "Dens":
                dense_weight = self.pipeline[layer][0]
                bias_weight = self.pipeline[layer][1]
                x, cache = affine_forward(x, dense_weight, bias_weight)
                weight_list.append(dense_weight)
                bias_list.append(bias_weight)
                cache_list.append(cache)
                
            
            else:
                assert 1 != 1, "Improper layer is in pipeline!!"
                
        #backpropagation, x: score(probability)
        b_weight_bias_list = []
        weight_idx = 1
        b_cache_idx = 1
        b_layer_idx = 1
        data_loss, dx = softmax(x, y)
        
        while b_cache_idx <= len(cache_list):
            layer_name = list(self.pipeline.keys())[-b_layer_idx][:4]
            if layer_name == "Conv":
                dx, dW, db = conv_backward_naive(dx, cache_list[-b_cache_idx])
                b_cache_idx += 1
                dW += self.reg * weight_list[-weight_idx]
                weight_idx += 1
                b_weight_bias_list.append(db)
                b_weight_bias_list.append(dW)
                
                
                
            elif layer_name == "Pool":
                dx = max_pool_backward_naive(dx, cache_list[-b_cache_idx])
                b_cache_idx += 1
                
            elif layer_name == "Relu":
                dx = relu_backward(dx, cache_list[-b_cache_idx])
                b_cache_idx += 1
                
            elif layer_name == "Dens":
                dx, dW, db = affine_backward(dx, cache_list[-b_cache_idx])
                b_cache_idx += 1
                dW += self.reg * weight_list[-weight_idx]
                weight_idx += 1
                b_weight_bias_list.append(db)
                b_weight_bias_list.append(dW)
                
                
            b_layer_idx +=  1
            
        reg_loss = 0.5 * self.reg * sum(np.sum(W * W) for W in weight_list)
        
        
        loss = data_loss + reg_loss
        grads = {key: value for key,value in zip(self.weight_bias,b_weight_bias_list[::-1])}
                
        return loss, grads   
    
    def compile_(self, optimizer, num_epochs = 10, **kwargs):
        self.batch_size = kwargs.pop("batch_size", 256)
        self.loss_history = []
        self.update_rule = optimizer
        self.optimizer_configs = {"learning_rate" : 1e-02}
        self.num_epochs = num_epochs
        
        self.checkpoint_name = kwargs.pop("checkpoint_name" , None)
        self.verbose = kwargs.pop("verbose", True)
        
        assert len(kwargs) <=0, "Unrecognized arguments {}".format(", ".join("{}".format(k) for k in list(kwargs.keys())))
        
        for p in self.pipeline:
            d = {k:v for k,v in self.optimizer_configs.items()}
            self.optimizer_configs[p] = d
        
        
        
    def fit(self, X_data, y_data,batch_size = 256,num_epochs = 10):
        num_train = X_data.shape[0]
        num_batches = num_train // batch_size
        
        for epoch in range(num_epochs):
            print("epoch : {}".format(epoch))
            start = 0
            end = batch_size
            for i in range(num_batches):
                X_batch = X_data[start:end]
                y_batch = y_data[start:end]
                num_bias = 1
                loss, grads = self.loss(X_batch, y_batch)
                #weight update
                for layer,weights in self.pipeline.items():
                    if layer[:4] == "Pool" or layer[:4] == "Relu":
                        continue
                    dw = grads[layer]
                    config = self.optimizer_configs[layer]
                    #print("{} before update : {}".format(layer, weights[0]))
                    next_w , next_config = self.update_rule(weights[0], dw,config)
                    self.pipeline[layer][0] = next_w
                    #print("after update : {}".format(self.pipeline[layer][0]))
                    self.optimizer_configs[layer] = next_config
                    bias_layer = "Bias_{}".format(num_bias)
                    #print("{} before update : {}".format(bias_layer, weights[1]))
                    dw = grads[bias_layer]
                    next_w, next_config = self.update_rule(weights[1], dw, config)
                    self.pipeline[layer][1] = next_w
                    #print("after update : {}".format(self.pipeline[layer][1]))
                    self.optimizer_configs[bias_layer] = next_config
                    num_bias +=1
                start += batch_size
                end += batch_size


    def predict(self,X_data, y_data,batch_size = 256):
        N = X_data.shape[0]
        num_batches = N//batch_size
        start = 0
        end = batch_size
        y_pred = []
        for i in range(num_batches):
            print(i)
            x = X_data[start:end]            
            start += batch_size
            end += batch_size
            cache = None
            for layer in self.pipeline.keys():
                layer_name = layer[:4]
                if layer_name == "Pool":
                    pool_param = self.pipeline[layer][1]
                    x, cache = max_pool_forward_naive(x, pool_param)

                elif layer_name == "Relu":
                    x, cache = relu_forward(x)

                elif layer_name == "Conv":
                    filter_size = self.pipeline[layer][0].shape[2]
                    conv_weight = self.pipeline[layer][0]
                    bias_weight= self.pipeline[layer][1]
                    conv_param = self.pipeline[layer][2]
                    x, cache = conv_forward_naive(x, conv_weight, bias_weight, conv_param)

                elif layer_name == "Dens":
                    dense_weight = self.pipeline[layer][0]
                    bias_weight = self.pipeline[layer][1]
                    x, cache = affine_forward(x, dense_weight, bias_weight)

                else:
                    assert 1 != 1, "Improper layer is in pipeline!!"
        return predict_softmax(x)


                
        

In [39]:
model = Sequential()
model.add("Conv", params = {"pad" : 1, "stride" : 1,"channel" : 1, "num_filters" : 16,  "filter_size" : 3})
model.add("Relu")
model.add("Pooling", params = {"pool_height" : 2, "pool_width" : 2, "stride": 2})
model.add("Conv", params = {"pad" : 1, "stride" : 1,  "channel" : 16, "num_filters" : 32,  "filter_size" : 3})
model.add("Relu")
model.add("Pooling",  params = {"pool_height" : 2, "pool_width" : 2, "stride": 2})
model.add("Conv", params = {"pad" : 1, "stride" : 1,  "channel" : 32, "num_filters" : 64,  "filter_size" : 3})
model.add("Relu")
model.add("Dense", params = {"num_filters" : 64, "H": 7, "W": 7, "hidden_dim" : 10})



# 훈련전

In [40]:
model.predict(X_train[0:1],y_train[0:1], batch_size = 1)

0


array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]])

In [41]:
model.compile_(optimizer = sgd)

In [1]:
model.fit(X_train, y_train)

NameError: name 'model' is not defined

# 훈련 후

In [None]:
model.predict(X_train[0:1], y_train[0:1],batch_size = 1)