In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
from layers import MultiLayer
from layers import Relu, Affine, SoftmaxWithLoss, Sigmoid
from collections import OrderedDict

In [2]:
def make_one(x):
    if x.ndim == 1: 
        t = np.zeros((x.size,np.unique(x).size))
        for i in range(t.shape[0]):
            t[i,x[i]]=1
        return t

In [5]:
class MultiLayer:
    def __init__(self,input_size,hidden_size,output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.hidden_size.insert(0,self.input_size)
        self.hidden_size.append(self.output_size)
        self.W = {}
        for i in range(len(hidden_size)-1):
            w_key = 'W'+str(i+1)
            b_key = 'b'+str(i+1)
            self.W[w_key] = np.random.randn(hidden_size[i],hidden_size[i+1])
            self.W[b_key] = np.random.randn(hidden_size[i+1])
            
        self.layers = OrderedDict()
        
        for i in range(int(len(self.W)/2-1)):
            j = i*2 
            key1 = 'Affine'+str(i+1)
            key2 = 'Relu'+str(i+1)
            w = list(self.W.keys())[j]
            b = list(self.W.keys())[j+1]
            self.layers[key1] = Affine(self.W[w],self.W[b])
            self.layers[key2] = Relu()
        
        last_num = str(int(len(self.W)/2))
        self.layers['Affine'+last_num] = Affine(self.W['W'+last_num],self.W['b'+last_num])
        self.Lastlayer = SoftmaxWithLoss()
        self.loss_val = []
        self.acc_val = []
    
    #def summary(self):
        
    
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self,x,t):
        y = self.predict(x)
        loss = self.Lastlayer.forward(y,t)
        return loss

    def gradient(self,x,t):
        self.loss(x,t)
        dout = 1
        dout = self.Lastlayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads = {}
        layer_number = int(len(self.layers.keys())/2)
        
        for i in range(1,layer_number):
            grads['W'+str(i)] = self.layers['Affine'+str(i)].dW
            grads['b'+str(i)] = self.layers['Affine'+str(i)].db
            
        return grads
    
    def accuracy(self,x,t):
        y = np.argmax(self.predict(x),axis=1)
        t = np.argmax(t, axis=1)
        acc = np.sum(y==t)/y.size
        return acc
    
    def fit(self,epochs,lr,x,t,x_val,t_val):
        for epoch in range(epochs):
            grads = self.gradient(x,t)
            for key in grads.keys():
                self.W[key] -=  lr*grads[key]
            print("epoch ",epoch,":val_loss===========",self.loss(x_val,t_val),"val_acc:========",self.accuracy(x_val,t_val))
            self.loss_val.append(self.loss(x_val,t_val))
            self.acc_val.append(np.round(self.accuracy(x_val,t_val),2))

In [6]:
mnist = fetch_openml('mnist_784')
X = mnist['data']
y = mnist['target']
X = X.astype(np.float32).values/255.
y = y.astype(np.int32).values
y = make_one(y)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.4)
X_val, X_test, y_val, y_test = train_test_split(X_test,y_test, test_size=.5)

input_size = X_train.shape[1]
hidden_size = [100,256,100]
output_size = y_train.shape[1]

In [7]:
model = MultiLayer(input_size,hidden_size,output_size)
epochs = 10
lr = 1e-3
model.fit(epochs,lr,X_train,y_train,X_val,y_val)
model.accuracy(X_test,y_test)



0.3905

In [None]:
#핏에 배치 추가 :: 방금 한거

In [12]:
class MultiLayer:
    def __init__(self,input_size,hidden_size,output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.hidden_size.insert(0,self.input_size)
        self.hidden_size.append(self.output_size)
        self.W = {}
        for i in range(len(hidden_size)-1):
            w_key = 'W'+str(i+1)
            b_key = 'b'+str(i+1)
            self.W[w_key] = np.random.randn(hidden_size[i],hidden_size[i+1])
            self.W[b_key] = np.random.randn(hidden_size[i+1])
            
        self.layers = OrderedDict()
        
        for i in range(int(len(self.W)/2-1)):
            j = i*2 
            key1 = 'Affine'+str(i+1)
            key2 = 'Relu'+str(i+1)
            w = list(self.W.keys())[j]
            b = list(self.W.keys())[j+1]
            self.layers[key1] = Affine(self.W[w],self.W[b])
            self.layers[key2] = Relu()
        
        last_num = str(int(len(self.W)/2))
        self.layers['Affine'+last_num] = Affine(self.W['W'+last_num],self.W['b'+last_num])
        self.Lastlayer = SoftmaxWithLoss()
        self.loss_val = []
        self.acc_val = []
    
    #def summary(self):
        
    
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self,x,t):
        y = self.predict(x)
        loss = self.Lastlayer.forward(y,t)
        return loss

    def gradient(self,x,t):
        self.loss(x,t)
        dout = 1
        dout = self.Lastlayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads = {}
        layer_number = int(len(self.layers.keys())/2)
        
        for i in range(1,layer_number):
            grads['W'+str(i)] = self.layers['Affine'+str(i)].dW
            grads['b'+str(i)] = self.layers['Affine'+str(i)].db
            
        return grads
    
    def accuracy(self,x,t):
        y = np.argmax(self.predict(x),axis=1)
        t = np.argmax(t, axis=1)
        acc = np.sum(y==t)/y.size
        return acc
    

    def fit_sgd(self,epochs,batch_size,lr,x,t,x_val,t_val): #미니배치
        if divmod(x.shape[0],batch_size)[1] > 0:
            batch = divmod(x.shape[0],batch_size)[0] + 1
        else:
            batch = divmod(x.shape[0],batch_size)[0]
        for epoch in range(epochs):
            if epoch == 0:
                start = 0
            end = start + batch_size
            if epoch == epochs-1 and divmod(x.shape[0],batch_size)[1] != 0:
                end = start+divmod(x.shape[0],batch_size)[1]
            x_tmp = x[start:end,:]
            t_tmp = t[start:end,:]
            start = end
            for i in range(batch):
                grads = self.gradient(x_tmp,t_tmp)
            for key in grads.keys():
                self.W[key] -=  lr*grads[key]
            if epoch % 20 == 0:
                print("epoch ",epoch,":val_loss===========",self.loss(x_val,t_val),"val_acc:========",self.accuracy(x_val,t_val))
                self.loss_val.append(self.loss(x_val,t_val))
                self.acc_val.append(np.round(self.accuracy(x_val,t_val),2))
                
                
    def fit_gd(self,epochs,lr,x,t,x_val,t_val): #풀배치
        for epoch in range(epochs):
            grads = self.gradient(x,t)
            for key in grads.keys():
                self.W[key] -=  lr*grads[key]
            if epoch % 20 == 0:
                    print("epoch ",epoch,":val_loss===========",self.loss(x_val,t_val),"val_acc:========",self.accuracy(x_val,t_val))
                    self.loss_val.append(self.loss(x_val,t_val))
                    self.acc_val.append(np.round(self.accuracy(x_val,t_val),2))

In [7]:
mnist = fetch_openml('mnist_784')
X = mnist['data']
y = mnist['target']
X = X.astype(np.float32).values/255.
y = y.astype(np.int32).values
y = make_one(y)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.4)
X_val, X_test, y_val, y_test = train_test_split(X_test,y_test, test_size=.5)

In [18]:
input_size = X.shape[1]
hidden_size = [100,150]
output_size = y.shape[1]
model = MultiLayer(input_size,hidden_size,output_size)

In [23]:
epochs = 10
lr = 1e-3
model.fit_gd(epochs,lr,X_train,y_train,X_val,y_val)
# model.accuracy(X_test,y_test)



In [24]:
epochs = 10
lr = 1e-3
batch_size = 100
model.fit_sgd(batch_size,epochs,lr,X_train,y_train,X_val,y_val)



In [None]:
# 까지 툴안쓰고 넘파이로만 돌리기 

In [1]:
class SGD:
    def __init__(self,lr=1e-3):
        self.lr = lr
    
    def minimize(self, w, grads):
        for key in w.keys():
            w[key] -=  self.lr*grads[key]
            
class Momentum:
    def __init__(self,lr=1e-3,m=0.9):
        self.lr = lr
        self.momentum = m
        self.v = None
    
    def minimize(self,w,grads):
        if self.v is None:
            self.v = {}
            for k,v in w.items():
                self.v[k] = np.zeros_like(v)
        for key in w.keys():
            self.v[key] = self.momentum + self.v[key] - self.lr*grads[key]
            w[key] += self.v[key]

In [None]:
optimizer = Momentum(lr=1e3)

In [None]:
epochs = 100
for epoch in range(epochs):
    grads = model.gradient(X_train,y_train)
    optimizer.minimize(model.W,grads)
    if epoch % 20 == 0:
        print("epoch ",epoch,":val_loss===========",model.loss(X_test,y_test),"val_acc:========",model.accuracy(X_test,y_test))
        model.loss_val.append(model.loss(X_test,y_test))
        model.acc_val.append(np.round(model.accuracy(X_test,y_test),2))

### 코딩하면서 테스트 해본것들