In [8]:
# colab 연결용
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor,Lambda
from torchvision import transforms

In [2]:
import numpy as np

In [35]:
training_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor(),
)

In [36]:
test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor()
)

In [37]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)

In [38]:
from torch import nn

In [39]:
# Feedforward Model
class MLP(torch.nn.Module):
  def __init__(self):
    super(MLP, self).__init__()
    self.linear_relu_stack = nn.Sequential(
        nn.Flatten(),
        nn.Linear(28*28, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Linear(512,512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Linear(512, 10),
    )
  def forward(self, x):
    outputs = self.linear_relu_stack(x)
    return outputs

In [40]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
lr = 0.001
epochs = 20
model = MLP().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [41]:
for epoch in range(epochs):
  for batch_idx, (X_train, y_train) in enumerate(train_dataloader):
    # Train
    model.train()
    optimizer.zero_grad()
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    pred = model(X_train)
    loss = criterion(pred, y_train)

    loss.backward()
    optimizer.step()
    
    if batch_idx % 100 == 0:
      loss, current = loss.item(), batch_idx * len(X_train)
      print(f"loss : {loss::>7f} [{current:>5d}/{len(train_dataloader.dataset):>5d}]")
      
  test_loss, correct = 0,0
  model.eval()
  with torch.no_grad():
    for X_test,y_test in test_dataloader:
      X_test = X_test.to(device)
      y_test = y_test.to(device)
      pred = model(X_test)
      test_loss += criterion(pred,y_test).item()
      correct += (pred.argmax(1) == y_test).type(torch.float).sum().item()
    test_loss /= len(test_dataloader)
    correct /= len(test_dataloader.dataset)
    print(f"Test Error:\n Accurancy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8}\n")


loss : 2.337666 [    0/60000]
loss : 1.417882 [ 6400/60000]
loss : 1.152488 [12800/60000]
loss : 1.092979 [19200/60000]
loss : 1.015640 [25600/60000]
loss : 0.906772 [32000/60000]
loss : 0.702210 [38400/60000]
loss : 0.836317 [44800/60000]
loss : 0.862248 [51200/60000]
loss : 0.717844 [57600/60000]
Test Error:
 Accurancy: 79.3%, Avg loss: 0.6517241878041835

loss : 0.639922 [    0/60000]
loss : 0.633773 [ 6400/60000]
loss : 0.763333 [12800/60000]
loss : 0.729423 [19200/60000]
loss : 0.751118 [25600/60000]
loss : 0.650236 [32000/60000]
loss : 0.496598 [38400/60000]
loss : 0.574316 [44800/60000]
loss : 0.685227 [51200/60000]
loss : 0.531638 [57600/60000]
Test Error:
 Accurancy: 82.4%, Avg loss: 0.5287218895166922

loss : 0.521707 [    0/60000]
loss : 0.670882 [ 6400/60000]
loss : 0.596616 [12800/60000]
loss : 0.536481 [19200/60000]
loss : 0.413756 [25600/60000]
loss : 0.536480 [32000/60000]
loss : 0.528198 [38400/60000]
loss : 0.477030 [44800/60000]
loss : 0.379460 [51200/60000]
loss : 0

In [42]:
# Feedforward Model
class NoBatchNormMLP(torch.nn.Module):
  def __init__(self):
    super(NoBatchNormMLP, self).__init__()
    self.linear_relu_stack = nn.Sequential(
        nn.Flatten(),
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512,512),
        nn.ReLU(),
        nn.Linear(512, 10),
    )
  def forward(self, x):
    outputs = self.linear_relu_stack(x)
    return outputs

In [43]:
no_batch_norm_model = NoBatchNormMLP().to(device)
optimizer = torch.optim.SGD(no_batch_norm_model.parameters(), lr=lr)

In [44]:
for epoch in range(epochs):
  for batch_idx, (X_train, y_train) in enumerate(train_dataloader):
    # Train
    no_batch_norm_model.train()
    optimizer.zero_grad()
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    pred = no_batch_norm_model(X_train)
    loss = criterion(pred, y_train)

    loss.backward()
    optimizer.step()
    
    if batch_idx % 100 == 0:
      loss, current = loss.item(), batch_idx * len(X_train)
      print(f"loss : {loss::>7f} [{current:>5d}/{len(train_dataloader.dataset):>5d}]")
  
  test_loss, correct = 0,0
  no_batch_norm_model.eval()
  with torch.no_grad():
    for X_test,y_test in test_dataloader:
      X_test = X_test.to(device)
      y_test = y_test.to(device)
      pred = no_batch_norm_model(X_test)
      test_loss += criterion(pred,y_test).item()
      correct += (pred.argmax(1) == y_test).type(torch.float).sum().item()
    test_loss /= len(test_dataloader)
    correct /= len(test_dataloader.dataset)
    print(f"Test Error:\n Accurancy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8}\n")

loss : 2.312639 [    0/60000]
loss : 2.292538 [ 6400/60000]
loss : 2.278856 [12800/60000]
loss : 2.270298 [19200/60000]
loss : 2.255163 [25600/60000]
loss : 2.232386 [32000/60000]
loss : 2.215727 [38400/60000]
loss : 2.194212 [44800/60000]
loss : 2.163147 [51200/60000]
loss : 2.160558 [57600/60000]
Test Error:
 Accurancy: 44.5%, Avg loss: 2.146650001946797

loss : 2.142972 [    0/60000]
loss : 2.134417 [ 6400/60000]
loss : 2.074998 [12800/60000]
loss : 2.055355 [19200/60000]
loss : 2.045841 [25600/60000]
loss : 1.962455 [32000/60000]
loss : 1.972443 [38400/60000]
loss : 1.980720 [44800/60000]
loss : 1.912199 [51200/60000]
loss : 1.849924 [57600/60000]
Test Error:
 Accurancy: 52.0%, Avg loss: 1.862774512788126

loss : 1.895276 [    0/60000]
loss : 1.783568 [ 6400/60000]
loss : 1.769848 [12800/60000]
loss : 1.712306 [19200/60000]
loss : 1.743461 [25600/60000]
loss : 1.599569 [32000/60000]
loss : 1.670750 [38400/60000]
loss : 1.610771 [44800/60000]
loss : 1.528476 [51200/60000]
loss : 1.6

In [45]:
from torchvision import transforms

In [46]:
mean = torch.zeros(1)
std = torch.zeros(1)
print('==> Computing mean and std..')
for inputs, _labels in train_dataloader:
  for i in range(1):
    mean[i] += inputs[:,i,:,:].mean()
    std[i] += inputs[:,i,:,:].std()
  mean.div_(len(train_dataloader))
  std.div_(len(train_dataloader))
print(mean, std)


==> Computing mean and std..
tensor([0.0003]) tensor([0.0004])


In [53]:
norm_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.0003), (0.0004))
])

In [54]:
norm_training_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=norm_transform,
)

In [55]:
norm_test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=norm_transform
)

In [56]:
norm_train_dataloader = DataLoader(norm_training_data, batch_size=64, shuffle=True)
norm_test_dataloader = DataLoader(norm_test_data, batch_size=64, shuffle=True)

In [57]:
model_for_norm = MLP().to(device)
optimizer = torch.optim.SGD(model_for_norm.parameters(), lr=lr)

In [58]:
for epoch in range(epochs):
  for batch_idx, (X_train, y_train) in enumerate(norm_train_dataloader):
    # Train
    model_for_norm.train()
    optimizer.zero_grad()
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    pred = model_for_norm(X_train)
    loss = criterion(pred, y_train)

    loss.backward()
    optimizer.step()
    
    if batch_idx % 100 == 0:
      loss, current = loss.item(), batch_idx * len(X_train)
      print(f"loss : {loss::>7f} [{current:>5d}/{len(norm_train_dataloader.dataset):>5d}]")
  
  test_loss, correct = 0,0
  model_for_norm.eval()
  with torch.no_grad():
    for X_test,y_test in norm_test_dataloader:
      X_test = X_test.to(device)
      y_test = y_test.to(device)
      pred = model_for_norm(X_test)
      test_loss += criterion(pred,y_test).item()
      correct += (pred.argmax(1) == y_test).type(torch.float).sum().item()
    test_loss /= len(norm_test_dataloader)
    correct /= len(norm_test_dataloader.dataset)
    print(f"Test Error:\n Accurancy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8}\n")


loss : 2.372258 [    0/60000]
loss : 1.310358 [ 6400/60000]
loss : 1.181325 [12800/60000]
loss : 0.871238 [19200/60000]
loss : 0.725590 [25600/60000]
loss : 0.617003 [32000/60000]
loss : 0.776429 [38400/60000]
loss : 0.780041 [44800/60000]
loss : 0.725992 [51200/60000]
loss : 0.681127 [57600/60000]
Test Error:
 Accurancy: 78.4%, Avg loss: 0.6659049306325852

loss : 0.571479 [    0/60000]
loss : 0.574710 [ 6400/60000]
loss : 0.684689 [12800/60000]
loss : 0.712088 [19200/60000]
loss : 0.667750 [25600/60000]
loss : 0.672077 [32000/60000]
loss : 0.623117 [38400/60000]
loss : 0.551999 [44800/60000]
loss : 0.553998 [51200/60000]
loss : 0.541286 [57600/60000]
Test Error:
 Accurancy: 81.6%, Avg loss: 0.5514632626703591

loss : 0.531536 [    0/60000]
loss : 0.515339 [ 6400/60000]
loss : 0.571115 [12800/60000]
loss : 0.656290 [19200/60000]
loss : 0.367349 [25600/60000]
loss : 0.490077 [32000/60000]
loss : 0.469596 [38400/60000]
loss : 0.399151 [44800/60000]
loss : 0.647262 [51200/60000]
loss : 0