<a href="https://colab.research.google.com/github/dahlia52/Advanced-Statistical-Data-Analysis/blob/main/LeNet_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.datasets import MNIST, CIFAR10, CIFAR100
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [None]:
path = './datasets/'

transform = transforms.Compose([transforms.ToTensor()]) # 이미지를 텐서로 변환

# Prepare Data
train_data = MNIST(root = path, train = True, transform = transform, download = True)
test_data = MNIST(root = path, train = False, transform = transform, download = True)

batch_size = 100

# DataLoader
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = True, num_workers = 4)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle = False, num_workers = 4)

print(train_data)
print(test_data)

input_shape = train_data[0][0].shape # (1,28,28)
output_shape = len(train_data.classes)

print(input_shape,output_shape)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 93014701.43it/s]

Extracting ./datasets/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 88420214.47it/s]

Extracting ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 21142790.66it/s]


Extracting ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 16755082.47it/s]


Extracting ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./datasets/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./datasets/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
           )
torch.Size([1, 28, 28]) 10




In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps:0")
elif torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu:0")

print(device)

cuda:0


In [None]:
class LeNet(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = (5,5), stride = 1, padding = 2) # MNIST 데이터는 흑백 데이터이므로 in_channels = 1
    self.pool1 = nn.AvgPool2d(kernel_size = (2,2), stride = 2, padding = 0)
    self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = (5,5), stride = 1) # kernel의 가로, 세로 길이가 동일하므로 kernel_size = 5도 가능
    self.pool2 = nn.AvgPool2d(kernel_size = (2,2), stride = 2, padding = 0)
    self.flatten = nn.Flatten()
    self.fc1 = nn.Linear(400,120)
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,output_shape)

  def forward(self, x):
    # print(x) # (100,1,28,28) # batch_size = 100
    hidden = F.leaky_relu(self.conv1(x))
    # print(hidden.shape) # (100,6,28,28)
    hidden = self.pool1(hidden)
    # print(hidden.shape) # (100,6,14,14)
    hidden = F.leaky_relu(self.conv2(hidden))
    # print(hidden.shape) # (100,16,10,10)
    hidden = self.pool2(hidden)
    # print(hidden.shape) # (100,16,5,5)
    hidden = self.flatten(hidden)
    # print(hidden.shape) # (100,400)
    hidden = F.leaky_relu(self.fc1(hidden))
    # print(hidden.shape) # (100,120)
    hidden = F.leaky_relu(self.fc2(hidden))
    # print(hidden.shape) # (100,84)
    output = self.fc3(hidden)
    # print(output.shape) # (100,10)
    return output

In [None]:
model = LeNet().to(device)
loss = nn.CrossEntropyLoss(reduction = 'sum')
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-3)

In [None]:
num_epoch = 15
train_loss_list, test_loss_list = list(), list()

for i in range(num_epoch):
  # train
  model.train()

  total_loss = 0
  count = 0

  for batch_idx, (x, y) in enumerate(train_loader):
    x, y = x.to(device), y.to(device)
    y_est = model.forward(x)
    cost = loss(y_est, y)

    total_loss += cost.item()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    pred = torch.argmax(y_est, dim = -1)
    count += (pred == y).sum().item()

  acc = count / len(train_data)
  avg_loss = total_loss / len(train_data)

  train_loss_list.append(avg_loss)

  if i % 1 == 0:
        print("\nEpoch %d Train: Loss %.3f / Accuracy %.3f"%(i,avg_loss,acc))

  # test
  model.eval()

  total_loss = 0
  count = 0

  with torch.no_grad():
    for batch_idx, (x, y) in enumerate(test_loader):
      x, y = x.to(device), y.to(device)
      y_est = model.forward(x)
      cost = loss(y_est, y)

      total_loss += cost.item()

      pred = torch.argmax(y_est, dim = -1)
      count += (pred == y).sum().item()

    acc = count / len(test_data)
    avg_loss = total_loss / len(test_data)

    test_loss_list.append(avg_loss)

    if i % 1 == 0:
      print("Epoch %d Test: Loss %.3f / Accuracy %.3f"%(i,avg_loss,acc))


Epoch 0 Train: Loss 0.006 / Accuracy 0.998
Epoch 0 Test: Loss 0.035 / Accuracy 0.991

Epoch 1 Train: Loss 0.006 / Accuracy 0.998
Epoch 1 Test: Loss 0.036 / Accuracy 0.990

Epoch 2 Train: Loss 0.006 / Accuracy 0.998
Epoch 2 Test: Loss 0.036 / Accuracy 0.991

Epoch 3 Train: Loss 0.006 / Accuracy 0.998
Epoch 3 Test: Loss 0.040 / Accuracy 0.990

Epoch 4 Train: Loss 0.004 / Accuracy 0.999
Epoch 4 Test: Loss 0.035 / Accuracy 0.992

Epoch 5 Train: Loss 0.004 / Accuracy 0.999
Epoch 5 Test: Loss 0.042 / Accuracy 0.991

Epoch 6 Train: Loss 0.005 / Accuracy 0.999
Epoch 6 Test: Loss 0.031 / Accuracy 0.992

Epoch 7 Train: Loss 0.004 / Accuracy 0.999
Epoch 7 Test: Loss 0.031 / Accuracy 0.992

Epoch 8 Train: Loss 0.005 / Accuracy 0.998
Epoch 8 Test: Loss 0.059 / Accuracy 0.989

Epoch 9 Train: Loss 0.006 / Accuracy 0.998
Epoch 9 Test: Loss 0.039 / Accuracy 0.991

Epoch 10 Train: Loss 0.005 / Accuracy 0.999
Epoch 10 Test: Loss 0.042 / Accuracy 0.991

Epoch 11 Train: Loss 0.004 / Accuracy 0.999
Epoch 1

In [None]:
num_parameter = 0

for parameter in model.parameters():
  print(parameter.shape)
  num_parameter += np.prod(parameter.size())
print(num_parameter)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])
61706
