In [1]:
import numpy as np # 선형대수 모듈
import matplotlib.pyplot as plt # 시각화 모듈
import torch # 파이토치
import torch.nn as nn # PyTorch의 모듈을 모아놓은 것. from~~이 아닌 저렇게 임포트를 하는 것이 거의 관습이라고 한다.
import torch.nn.functional as F # torch.nn 중에서 자주 쓰는 함수를 F로 임포트.
import torch.nn.init as init # 초기화 관련 모듈 
import torchvision # TorchVision 임포트
from torchvision import transforms, datasets # 데이터를 다루기 위한 TorchVision 내의 Transforms와 datasets를 따로 임포트
from collections import OrderedDict
import json

In [2]:
DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')

BATCH_SIZE = 32
EPOCHS = 30

print('Using PyTorch version: ', torch.__version__, 'Device: ', DEVICE)

Using PyTorch version:  1.13.0 Device:  cpu


In [3]:
train_dataset = datasets.MNIST(
    root="../data/MNIST",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)
test_dataset = datasets.MNIST(
    root="../data/MNIST",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [4]:
test_dataset[0]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [5]:
for (X_train, y_train) in train_loader:
    print('X_train: ', X_train.size(), 'type: ', X_train.type())
    print('y_train: ', y_train.size(), 'type: ', y_train.type())
    break

X_train:  torch.Size([32, 1, 28, 28]) type:  torch.FloatTensor
y_train:  torch.Size([32]) type:  torch.LongTensor


In [6]:
layer1_in=28*28
layer1_out=32
layer2_in=32
layer2_out=16
layer3_in=16
layer3_out=10
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(layer1_in, layer1_out)
        self.fc2 = nn.Linear(layer2_in, layer2_out)
        self.fc3 = nn.Linear(layer3_in, layer3_out)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = F.relu(x) # sigmoid(x)
        x = self.fc2(x)
        x = F.relu(x) # sigmoid(x)
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)
        return x

In [7]:
def weight_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight.data)

model = MLP().to(DEVICE) # 정의한 모델을 GPU로 납치
model.apply(weight_init)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

In [8]:
def train(model, train_loader, optimizer, log_interval):
    model.train()
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

In [9]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += loss_fn(output, label).item()
            prediction = output.max(1, keepdim=True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [10]:
max_test_accuracy=0
w1_name="float w1["+str(layer1_out)+"]["+str(layer1_in)+"] = { { "
b1_name="float b1["+str(layer1_out)+"] = { "
w2_name="float w2["+str(layer2_out)+"]["+str(layer2_in)+"] = { { "
b2_name="float b2["+str(layer2_out)+"] = { "
w3_name="float w3["+str(layer3_out)+"]["+str(layer3_in)+"] = { { "
b3_name="float b3["+str(layer3_out)+"] = { "
for Epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, log_interval=100)
    test_loss, test_accuracy = evaluate(model, test_loader)
    print("[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} %".format(
        Epoch, test_loss, test_accuracy
    ))
    if test_accuracy>max_test_accuracy:
      max_test_accuracy=test_accuracy
      acc=str(max_test_accuracy)
      PATH = "MLP_"+str(layer2_in)+"_"+str(layer3_in)+"_"+acc+"_"+str(Epoch)+".pt"
      torch.save(model.state_dict(), PATH)
      model_verb_cpu = model.eval()
      od1 = model_verb_cpu.state_dict()
      od1 = OrderedDict({k: od1[k].detach().cpu().tolist() for k in od1})
      od1 = json.dumps(od1)
      w1=str(od1.split('fc1.weight": [[')[1]).split("]]")[0]
      w1=w1.replace('[','{')
      w1=w1.replace(']','}')
      b1=str(od1.split('fc1.bias": [')[1]).split(']')[0]
      w2=str(od1.split('fc2.weight": [[')[1]).split("]]")[0]
      w2=w2.replace('[','{')
      w2=w2.replace(']','}')
      b2=str(od1.split('fc2.bias": [')[1]).split(']')[0]
      w3=str(od1.split('fc3.weight": [[')[1]).split("]]")[0]
      w3=w3.replace('[','{')
      w3=w3.replace(']','}')
      b3=str(od1.split('fc3.bias": [')[1]).split(']')[0]
      with open("MLP_"+str(layer2_in)+"_"+str(layer3_in)+"_"+acc+"_"+str(Epoch)+".h", 'w') as outfile:
          outfile.write(w1_name)
          outfile.write(w1)
          outfile.write("} };\n")
          outfile.write(b1_name)
          outfile.write(b1)
          outfile.write("};\n")
          outfile.write(w2_name)
          outfile.write(w2)
          outfile.write("} };\n")
          outfile.write(b2_name)
          outfile.write(b2)
          outfile.write("};\n")
          outfile.write(w3_name)
          outfile.write(w3)
          outfile.write("} };\n")
          outfile.write(b3_name)
          outfile.write(b3)
          outfile.write("};")

[EPOCH: 1], 	Test Loss: 0.0066, 	Test Accuracy: 93.99 %
[EPOCH: 2], 	Test Loss: 0.0065, 	Test Accuracy: 93.93 %
[EPOCH: 3], 	Test Loss: 0.0075, 	Test Accuracy: 93.68 %
[EPOCH: 4], 	Test Loss: 0.0065, 	Test Accuracy: 94.18 %
[EPOCH: 5], 	Test Loss: 0.0057, 	Test Accuracy: 95.14 %
[EPOCH: 6], 	Test Loss: 0.0059, 	Test Accuracy: 95.03 %
[EPOCH: 7], 	Test Loss: 0.0060, 	Test Accuracy: 95.29 %
[EPOCH: 8], 	Test Loss: 0.0071, 	Test Accuracy: 94.71 %
[EPOCH: 9], 	Test Loss: 0.0064, 	Test Accuracy: 94.94 %
[EPOCH: 10], 	Test Loss: 0.0050, 	Test Accuracy: 96.02 %
[EPOCH: 11], 	Test Loss: 0.0055, 	Test Accuracy: 95.87 %
[EPOCH: 12], 	Test Loss: 0.0064, 	Test Accuracy: 95.43 %
[EPOCH: 13], 	Test Loss: 0.0058, 	Test Accuracy: 95.68 %
[EPOCH: 14], 	Test Loss: 0.0058, 	Test Accuracy: 95.66 %
[EPOCH: 15], 	Test Loss: 0.0053, 	Test Accuracy: 96.24 %
[EPOCH: 16], 	Test Loss: 0.0064, 	Test Accuracy: 95.30 %
[EPOCH: 17], 	Test Loss: 0.0057, 	Test Accuracy: 96.20 %
[EPOCH: 18], 	Test Loss: 0.0065, 	Test A