input未進行任何處理
input大小 28x28

LeNet優點
* 避免了對影象複雜的前期預處理過程，尤其是人工參與影象預處理過程\
* 卷積：
  * 區域性感受野
  * 共享權值、減少計算量
  * 池化:平移旋轉不變性、降低依賴、降低過擬和

In [11]:
import time
import torch
from torch import nn, optim
import sys
import torchvision
from torchvision import transforms
# sys.path.append("..")
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [12]:
def evaluate_accuracy(data_iter, net, device=None):
  if device is None and isinstance(net, nn.Module):
    device = list(net.parameters())[0].device
  acc_sum, n = 0.0, 0
  with torch.no_grad():
    for X, y in train_iter:
      if isinstance(net, nn.Module):          
        net.eval()
        acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum()
        net.train()
      else: #自定義模型
        pass
      n += y.shape[0]
  return acc_sum / n

def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):  
  net = net.to(device)
  loss = nn.CrossEntropyLoss()
  
  for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, batch_count, start, n = 0.0, 0.0, 0, time.time(), 0
    for X, y in train_iter:
      X=X.to(device)
      y=y.to(device)
      y_hat = net(X)
      l = loss(y_hat, y)
      optimizer.zero_grad()
      l.backward()
      optimizer.step()

      train_l_sum += l.cpu().item()
      train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
      batch_count += 1
      n += y.shape[0]
    test_acc = evaluate_accuracy(test_iter, net)
    print('epoch: %d, loss: %.4f, acc: %.3f, test_acc: %.3f time: %.1f sec' % (epoch+1, train_l_sum/batch_count, train_acc_sum/n, test_acc, time.time()-start))

In [None]:
def load_mnist(batch_size):
  mnist_train = torchvision.datasets.FashionMNIST(root="~/Datasets/FashionMN", train=True, download=True, transform=transforms.ToTensor())
  mnist_test = torchvision.datasets.FashionMNIST(root="~/Datasets/FashionMN", train=False, download=True, transform=transforms.ToTensor())
  if sys.platform.startswith('win'):
    num_workers = 0
  else:
    num_workers = 4
  train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=False, num_workers=num_workers)
  test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

  return train_iter, test_iter


In [None]:
class LeNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = nn.Sequential( #input_size 28x28
        nn.Conv2d(1,6,5),
        nn.Sigmoid(),
        nn.MaxPool2d(2,2),
        nn.Conv2d(6,16,5),
        nn.Sigmoid(),
        nn.MaxPool2d(2,2)        
    )
    self.fc = nn.Sequential(    
        nn.Linear(16*4*4, 120),
        nn.Sigmoid(),
        nn.Linear(120,84),
        nn.Sigmoid(),
        nn.Linear(84,10)
    )

  def forward(self, input):
    features = self.conv(input)
    output = self.fc(features.view(input.shape[0],-1))
    return output

net = LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [None]:
batch_size = 256
train_iter, test_iter = load_mnist(batch_size=batch_size)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
list(net.parameters())[0].device

device(type='cpu')

In [None]:
lr, num_epochs = 0.001, 5
optimizer = optim.Adam(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

In [None]:
def predict(input):
  return net(input.to(device)).argmax(dim=1)
for X, y in test_iter:
  predictions = predict(X)
  print(predictions)

AlexNet的成功
* 使用了非線性啟用函式：ReLU 
  * 使用Sigmoid或者tanh等容易出現梯度彌散或梯度飽和的情況，當輸入的值非常大或者非常小的時候，這些神經元的梯度接近於0
  * 由於ReLU是線性的，且導數始終為1，計算量大大減少，收斂速度會比Sigmoid/tanh快很多
* 防止過擬合的方法：Dropout，資料擴充（Data augmentation） 
  * 如果無法獲得新資料，就用擴充的當作新資料，提高泛化能力：水平翻轉影象，從原始影象中隨機裁剪、平移變換，顏色、光照變換
* 其他：多GPU實現，LRN歸一化層的使用


AlexNet 重疊池化減少過擬合?
* pool 一旦我們知道原始輸入量中（這裡將會有一個高激活值）有一個特定的特徵，那麼它的確切位置，就沒有它相對於其他特徵的位置那麼重要了，所以提取出來。有兩個優點(因為參數減少)
減少計算成本、降低過擬合

  * 抑制噪声，降低信息冗余
  * 提升模型的尺度不变性、旋转不变形
  * 降低模型计算量
  * 防止过拟合

* stide 壓縮信息 如果希望在感受域有更少地重疊、保持較小的output
* padding 避免信息損失

In [6]:
class AlexNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = nn.Sequential(
        nn.Conv2d(1, 96, 11, 4),
        nn.ReLU(),
        nn.MaxPool2d(3, 2),
        nn.Conv2d(96, 256, 5, 1, 2),
        nn.ReLU(),
        nn.MaxPool2d(3, 2),
        nn.Conv2d(256, 384, 3, 1, 1),
        nn.ReLU(),
        nn.Conv2d(384, 384, 3, 1, 1),
        nn.ReLU(),
        nn.Conv2d(384, 256, 3, 1, 1),
        nn.ReLU(),
        nn.MaxPool2d(3, 2)
    )
    self.fc = nn.Sequential(
        nn.Linear(256*6*6, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 10) 
        )
    
  def forward(self, input):
    feature = self.conv(input)
    output = self.fc(feature.view(input.shape[0], -1))
    return output

In [7]:
net = AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (

In [3]:
def load_mnist_alex(batch_size, resize=None, root="~/Datasets/FashionMN"):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
      trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    transform = torchvision.transforms.Compose(trans) #組合Transform

    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

    return train_iter, test_iter

batch_size = 128
train_iter, test_iter = load_mnist_alex(batch_size, resize=224)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/Datasets/FashionMN/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/Datasets/FashionMN/FashionMNIST/raw
Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!


In [None]:
lr, num_epochs = 0.001, 5
optimizer = optim.Adam(net.parameters(), lr=lr)
train_ch5(net=net, train_iter=train_iter, test_iter=test_iter, optimizer=optimizer, batch_size=batch_size, num_epochs=num_epochs, device=device)