## Naive fMNIST

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.datasets as datasets
import torchvision.transforms as transforms
import random
import numpy as np

In [2]:
device = torch.device("cuda:0")

learning_rate = 0.001
epochs = 15
batch_size = 128

- dataset 로드할 때 transform.ToTensor() 
    - Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]

In [3]:
# MNIST dataset
mnist_train = datasets.FashionMNIST(root='fMNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = datasets.FashionMNIST(root='fMNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

- train_data example

In [4]:
image, label = mnist_train[0]

In [5]:
print('image:', image.shape)
print('label:', label)

image: torch.Size([1, 28, 28])
label: 9


In [6]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [7]:
class NaiveModel(nn.Module):
    def __init__(self):
        super(NaiveModel, self).__init__()
        self.dense1 = nn.Linear(784,256)
        self.dense2 = nn.Linear(256,256)
        self.dense3 = nn.Linear(256,10)
        self.relu = F.relu
    
    def forward(self, x):
        output = self.relu(self.dense1(x))
        output = self.relu(self.dense2(output))
        output = self.dense3(output)
        return output

In [8]:
model = NaiveModel().to(device)

- model parameters에 access하는 방법

In [9]:
np.shape(model.dense1.weight)

torch.Size([256, 784])

In [10]:
np.shape(model.dense1.bias)

torch.Size([256])

In [11]:
for item in model.dense1.parameters():
    print(np.shape(item))

torch.Size([256, 784])
torch.Size([256])


In [12]:
for item in model.parameters():
    print(np.shape(item))

torch.Size([256, 784])
torch.Size([256])
torch.Size([256, 256])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])


- Normal init

In [13]:
for params in model.parameters():
    nn.init.normal_(params)

In [14]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
total_batch = len(data_loader)  #60,000/128 = 468
for epoch in range(epochs):    
    avg_loss = 0
    for X_batch, Y_batch in data_loader:
        X_batch = torch.reshape(X_batch, [-1, 784]).to(device)
        Y_batch = Y_batch.to(device)
        
        pred = model(X_batch)
        loss = criterion(pred, Y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        avg_loss += loss
    avg_loss /= total_batch
    print('epoch:', epoch, 'loss: ', avg_loss.item())

epoch: 0 loss:  178.77813720703125
epoch: 1 loss:  70.01497650146484
epoch: 2 loss:  50.85539245605469
epoch: 3 loss:  40.01912307739258
epoch: 4 loss:  32.85101318359375
epoch: 5 loss:  27.57170295715332
epoch: 6 loss:  23.160036087036133
epoch: 7 loss:  20.083572387695312
epoch: 8 loss:  17.494884490966797
epoch: 9 loss:  15.129667282104492
epoch: 10 loss:  13.448171615600586
epoch: 11 loss:  11.841980934143066
epoch: 12 loss:  10.589550971984863
epoch: 13 loss:  9.266057014465332
epoch: 14 loss:  8.550172805786133


- test_data example

In [16]:
image, label = mnist_test[0]

In [17]:
output = model(image.view(-1,784).to(device))
print(output)

tensor([[-417.1286, -992.2338, -539.6521, -672.2505, -155.7197,  827.4575,
         -199.6065,  862.0845,  207.2610, 1172.1598]], device='cuda:0',
       grad_fn=<AddmmBackward>)


In [18]:
max_val, argmax_val = torch.max(output, dim=1)  # max, argmax 둘다 return
print(max_val)
print(argmax_val)

tensor([1172.1598], device='cuda:0', grad_fn=<MaxBackward0>)
tensor([9], device='cuda:0')


In [19]:
(argmax_val==label)

tensor([1], device='cuda:0', dtype=torch.uint8)

- evaluate model

In [20]:
test_loader = torch.utils.data.DataLoader(dataset=mnist_test, batch_size=128, shuffle=False)

In [21]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        output = model(images.view(-1,784).to(device))
        _, predicted = torch.max(output, dim=1)        
        total += labels.size(0)
        correct += (predicted ==labels.to(device)).sum().item()

print('Accuracy on test images:', (correct / total))

Accuracy on test images: 0.823


- note. 위와 같은 모델로 MNIST 돌렸을 때 test accuracy 94% 나왔음

### - Other initialization: Xavier for tanh, He for relu

- He initialization: we just multiply random norm initialization with $\sqrt{\frac{2}{size^{[l-1]}}}$
- $W^{l}$ =  np.random.randn(size_l, size_l-1) * np.sqrt(2/size_l-1)
- Xavier initialization: mutiply with $\sqrt{\frac{1}{size^{[l-1]}}}$

- 아래와 같이 하면 error 뜸. 
- Fan in and fan out can not be computed for tensor with fewer than 2 dimensions

In [22]:
# for params in model.parameters():
#     nn.init.xavier_uniform_(params)   

In [23]:
model_xavier = NaiveModel().to(device)

In [24]:
nn.init.xavier_uniform_(model_xavier.dense1.weight)
nn.init.xavier_uniform_(model_xavier.dense2.weight)
nn.init.xavier_uniform_(model_xavier.dense3.weight)

Parameter containing:
tensor([[ 0.0530,  0.0202,  0.0023,  ...,  0.0925, -0.0421, -0.1132],
        [-0.0084, -0.0689,  0.0030,  ..., -0.0637,  0.1469, -0.0965],
        [-0.1422,  0.0940,  0.1371,  ...,  0.0112, -0.1289,  0.1424],
        ...,
        [ 0.0405,  0.0858, -0.0344,  ...,  0.0395, -0.1401,  0.0501],
        [ 0.0062, -0.1438,  0.0675,  ...,  0.0591,  0.0258,  0.1370],
        [ 0.1383,  0.1414, -0.0412,  ...,  0.0933,  0.1319,  0.0420]],
       device='cuda:0', requires_grad=True)

In [25]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model_xavier.parameters(), lr=learning_rate)

In [26]:
total_batch = len(data_loader)  #60,000/128 = 468
for epoch in range(epochs):    
    avg_loss = 0
    for X_batch, Y_batch in data_loader:
        X_batch = torch.reshape(X_batch, [-1, 784]).to(device)
        Y_batch = Y_batch.to(device)
        
        pred = model_xavier(X_batch)
        loss = criterion(pred, Y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        avg_loss += loss
    avg_loss /= total_batch
    print('epoch:', epoch, 'loss: ', avg_loss.item())

epoch: 0 loss:  0.5027369856834412
epoch: 1 loss:  0.3580172061920166
epoch: 2 loss:  0.3243653178215027
epoch: 3 loss:  0.29909634590148926
epoch: 4 loss:  0.28359007835388184
epoch: 5 loss:  0.26590773463249207
epoch: 6 loss:  0.2530584931373596
epoch: 7 loss:  0.23926635086536407
epoch: 8 loss:  0.22942577302455902
epoch: 9 loss:  0.222588449716568
epoch: 10 loss:  0.2129589468240738
epoch: 11 loss:  0.20299723744392395
epoch: 12 loss:  0.1939186453819275
epoch: 13 loss:  0.18669550120830536
epoch: 14 loss:  0.1814003437757492


In [32]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        output = model_xavier(images.view(-1,784).to(device))
        _, predicted = torch.max(output, dim=1)        
        total += labels.size(0)
        correct += (predicted ==labels.to(device)).sum().item()

print('Accuracy on test images with Xavier initialization:', (correct / total))

Accuracy on test images with Xavier initialization: 0.8923
