In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
import matplotlib.pyplot as plt

x_train = np.load("MNIST_x_train.npy")
x_test = np.load("MNIST_x_test.npy")
y_train = np.load("MNIST_y_train.npy")
y_test = np.load("MNIST_y_test.npy")


In [2]:
y_test.shape,y_test[0]

((10000,), 7)

In [2]:
print(x_train.shape)
x_train = np.reshape(x_train,(60000,-1))
x_train.shape

(60000, 28, 28)


(60000, 784)

In [None]:
# input reshape 
# model 만들기 (비선형)
# train, visualize 구현 -> tensor로 만들기
# test 구현 -> test input 넣고 -> test_acc_hist 반환 


In [3]:
class MyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.model = torch.nn.Sequential(
            torch.nn.Linear(28 * 28, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 10),
        )
    def forward(self,x):
        y = self.model(x)
        return y


In [4]:
x_train = np.reshape(x_train,(60000,-1))
x_test = np.reshape(x_test,(10000,-1))
x_train = torch.tensor(x_train,dtype=torch.float32)
x_test = torch.tensor(x_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.long)
y_test = torch.tensor(y_test,dtype=torch.long)


In [30]:
from torch.nn import MSELoss,CrossEntropyLoss
from torch.optim import SGD,Adam,RMSprop
import copy

model = MyModel()
optimizer = Adam(model.parameters(),lr=0.01)
loss_fn = CrossEntropyLoss()

num_epoch = 100
model_hist=[]
loss_hist=[]
train_acc_hist=[]
test_acc_hist=[]



def test():
    global x_test,y_test,test_acc_hist
    # model(x_test)
    # max_idx = argmax
    # acc = sum(y_test == max_idx) / len(y_test)
    y_pred = model(x_test)
    y_pred = torch.exp(y_pred)
    y_pred = y_pred/(torch.sum(y_pred,dim=1)).unsqueeze(1)
    pred_max_idx = torch.argmax(y_pred,1)
    acc = sum(y_test==pred_max_idx).detach() / len(y_test)

    return acc.item()

def train():
    global x_train,y_train,optimizer,loss_fn,model_hist,loss_hist,train_acc_hist,num_epoch

    for epoch in range(1,num_epoch+1):
        optimizer.zero_grad()
        y_pred = model(x_train)
        
        #acc
        pred_idx = torch.argmax(y_pred,1)
        acc = sum(y_train==pred_idx).detach()/len(y_train)
        
        loss = loss_fn(y_pred,y_train)
        loss.backward()
        optimizer.step()

        #test
        test_acc = test()

        if epoch%(num_epoch//10)==0:
            print(f"Epoch {epoch}: loss = {loss.item():.2f} acc = {acc.item():.2f}, test acc= {test_acc:.2f}")
            model_hist.append(copy.deepcopy(model))
        
        loss_hist.append(loss.item())
        train_acc_hist.append(acc.item())

    return loss_hist,model_hist,train_acc_hist

In [32]:
train()

Epoch 10: loss = 0.12 acc = 0.96, test acc= 0.96
Epoch 20: loss = 0.10 acc = 0.97, test acc= 0.96
Epoch 30: loss = 0.09 acc = 0.97, test acc= 0.96
Epoch 40: loss = 0.08 acc = 0.98, test acc= 0.96
Epoch 50: loss = 0.07 acc = 0.98, test acc= 0.96
Epoch 60: loss = 0.06 acc = 0.98, test acc= 0.96
Epoch 70: loss = 0.06 acc = 0.98, test acc= 0.96
Epoch 80: loss = 0.05 acc = 0.99, test acc= 0.97
Epoch 90: loss = 0.04 acc = 0.99, test acc= 0.97
Epoch 100: loss = 0.03 acc = 0.99, test acc= 0.97


([2.305330753326416,
  2.257697582244873,
  2.1168155670166016,
  1.909471035003662,
  1.661846399307251,
  1.3580609560012817,
  1.0672671794891357,
  0.9130781292915344,
  1.0286641120910645,
  0.8615967035293579,
  0.8111891150474548,
  0.768997073173523,
  0.6662917137145996,
  0.618152379989624,
  0.6361139416694641,
  0.5864555835723877,
  0.5290119051933289,
  0.5246685147285461,
  0.49872148036956787,
  0.4696893095970154,
  0.4655166566371918,
  0.45322391390800476,
  0.41776779294013977,
  0.3985891044139862,
  0.4018174111843109,
  0.39691588282585144,
  0.37797626852989197,
  0.3588845431804657,
  0.3525104224681854,
  0.35124266147613525,
  0.3440192937850952,
  0.333543062210083,
  0.3224695920944214,
  0.3150693476200104,
  0.3102084696292877,
  0.30381786823272705,
  0.29714497923851013,
  0.2894029915332794,
  0.2820010185241699,
  0.276216059923172,
  0.2722039818763733,
  0.2674105167388916,
  0.260250061750412,
  0.25327613949775696,
  0.24854248762130737,
  0.24481

In [33]:
pred = model(x_train[0:5])
pred = torch.exp(pred)
pred = pred/(torch.sum(pred,dim=1)).unsqueeze(1)
print(pred)

pred_idx = torch.argmax(pred,1)
print(pred_idx)
print(y_train[0:5])
print(y_test[0:5])


tensor([[4.3344e-09, 2.3089e-05, 4.6382e-08, 6.6460e-02, 1.4863e-13, 9.3352e-01,
         2.8347e-09, 4.4773e-08, 1.3687e-08, 5.7276e-07],
        [9.9999e-01, 9.5064e-12, 5.7447e-06, 2.5536e-08, 1.2013e-11, 6.3345e-09,
         1.0917e-07, 6.9767e-11, 2.4158e-10, 6.8196e-08],
        [1.2172e-05, 8.8523e-08, 7.7687e-07, 8.5721e-04, 9.9355e-01, 2.9929e-04,
         2.0395e-06, 1.9668e-04, 3.0366e-05, 5.0513e-03],
        [1.1075e-07, 9.9992e-01, 1.4502e-06, 1.0025e-07, 2.1325e-07, 1.3974e-07,
         6.9693e-08, 7.4759e-05, 4.3110e-06, 1.3517e-08],
        [1.9401e-08, 6.9599e-09, 1.0342e-09, 1.9595e-05, 1.1515e-05, 7.2721e-06,
         9.3428e-11, 4.9674e-07, 4.8959e-04, 9.9947e-01]],
       grad_fn=<DivBackward0>)
tensor([5, 0, 4, 1, 9])
tensor([5, 0, 4, 1, 9])
tensor([7, 2, 1, 0, 4])


In [34]:
torch.save(model.state_dict(),'myMNISTmodel.pth')