<a href="https://colab.research.google.com/github/YukiTamaoki/gitkraken-i18n/blob/master/SpringSeminar_PyTorch1_ipynb_%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 春の勉強会 (PyTorch1)

## モジュールの呼び出し

In [3]:
import numpy as np
import argparse
import sys

import urllib
import zipfile
import os

import torch
import torch.nn as nn #計算グラフの構築に使用
import torch.optim as optim #最適化アルゴリズム
import torchsummary #特徴マップのサイズ確認
import torchvision #機械学習のフレームワーク
import torchvision.transforms as transform #画像変換
from torch.utils.data import DataLoader, Dataset #Detaloader
from torchvision.datasets import MNIST
from torch.autograd import Variable #自動で微分

## モデルクラスの定義

In [4]:
class NN(nn.Module):
    def __init__(self,in_size,hidden_size,out_size):
        super(NN,self).__init__()
        self.xh = nn.Linear(in_size,hidden_size) #全結合層
        self.ho = nn.Linear(hidden_size,out_size) #全結合層
        self.act = nn.ReLU() #ReLU関数
        self.softmax = nn.Softmax(dim=1)
    def forward(self,x,train=False):
        x = x.view(-1,28*28) #28*28に調整
        h = self.act(self.xh(x))
        y = self.act(self.ho(h))
        if train:
            return y
        return self.softmax(y)

## パラメータの設定

In [5]:
parser=argparse.ArgumentParser()
parser.add_argument('--epoch',type=int,default=10,help='number of epoch')
parser.add_argument('--in_size',type=int,default=784,help='input size')
parser.add_argument('--hidden_size',type=int,default=50,help='hidden size')
parser.add_argument('--out_size',type=int,default=10,help='output size')
parser.add_argument('--batch_size',type=int,default=32,help='number of batch size')
parser.add_argument('--lr',type=float,default=0.01,help='number of learning rate')
args=parser.parse_args(args=[])

## データセットのダウンロード

足立先輩のdropboxからMNISTをダウンロード


In [6]:
class MNIST_Loader(Dataset):
  def __init__(self, train=True, download=True, transform=None):
    self.transform = transform
    self.url = 'https://www.dropbox.com/s/hc7ukm7vzst5e40/MNIST.zip?dl=1'
    
    if download:
      self._download()
    
    dname = os.path.join(self.url.rpartition('/')[2][:-9], 'processed')
    if train:
      datapath = os.path.join(dname, 'training.pt')
    else:
      datapath = os.path.join(dname, 'test.pt')
  
    self.data = torch.load(datapath)

  def __len__(self):
    return len(self.data[0])

  def _download(self):
    filename = self.url.rpartition('/')[2][:-5]
    urllib.request.urlretrieve(self.url, filename)

    with zipfile.ZipFile(filename) as existing_zip:
      existing_zip.extractall()
    os.remove(filename)

  def __getitem__(self, i):
    image = transform.ToPILImage()(self.data[0][i])
    label = self.data[1][i]

    if self.transform:
      image = self.transform(image)
    return image, label

In [7]:
try:
  mnist_data = MNIST(root='./data', train=True, transform=transform.ToTensor(), download=True)
except: #例外処理
  print('Warning: Switch the original implementation because official MNIST data did not download (probably the official server has down).')
  mnist_data = MNIST_Loader(train=True, download=True, transform=transform.ToTensor())
print('Done!')

Done!


## 学習用データと評価用データを用意

In [8]:
train_loader=DataLoader(mnist_data,batch_size=args.batch_size,shuffle=True)
test_loader=DataLoader(mnist_data,batch_size=1,shuffle=True)

## モデルの読み込み

In [9]:
model=NN(args.in_size,args.hidden_size,args.out_size)

In [10]:
torchsummary.summary(model, (1,28,28)) #確認用

RuntimeError: ignored

## 損失関数と最適化手法の定義

In [None]:
criterion = nn.CrossEntropyLoss() #損失関数をCross Entropyに設定
optimizer = optim.SGD(model.parameters(),lr=args.lr) #SGDに設定

## 学習&評価用コード

In [None]:
for epoch in range(args.epoch):
    run_loss=0
    correct=0
    total=0
    for i,data in enumerate(train_loader):
        inputs,labels=data
        inputs,labels=Variable(inputs),Variable(labels)
        optimizer.zero_grad()
        outputs=model(inputs,train=True)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        run_loss+=loss.item()
        if i==len(train_loader)-1:
            print('%d loss:%.7f'%(epoch+1,run_loss/len(train_loader)))
            run_loss=0
    for i,data in enumerate(test_loader):
        inputs,labels=data
        inputs,labels=Variable(inputs),Variable(labels)
        outputs=model(inputs,train=False)
        predicted=torch.argmax(outputs)
        total+=labels.size(0)
        correct+=(predicted==labels).sum()
    print("Accuracy %d /%d =%.5f" % (correct,total,float(correct)/total))
    print("========================")

## 分類された画像のプロット

In [None]:
import matplotlib.pyplot as plt
test_iter=iter(test_loader)

GT_label,P_label,image=[],[],[]
for i in range(100):
    inputs,labels=test_iter.next()
    outputs=model(Variable(inputs))
    _,predicted=torch.max(outputs.data,1)
    GT_label.append(labels[0].numpy().astype(np.int32))
    P_label.append(predicted[0].numpy().astype(np.int32))
    image.append(inputs[0].numpy().reshape(28,28))
    
plt.figure(figsize=[36,36])
for e,(gt,p,i) in enumerate(zip(GT_label,P_label,image)):
    plt.subplot(10,10,e+1)
    plt.imshow(i,cmap='gray') #gray scaleで描画
    plt.xticks([]) #目盛りいらない
    plt.yticks([])
    plt.title("true label # %01d" %gt,c='b') #blueでtrue labelをタイトルとして描画
    plt.xlabel("pred label # %01d" %p,c='r') #redでpred labetをx軸ラベルとして描画

## 課題1

In [None]:
parser=argparse.ArgumentParser()
parser.add_argument('--epoch',type=int,default=10,help='number of epoch')
parser.add_argument('--in_size',type=int,default=784,help='input size')
parser.add_argument('--hidden_size',type=int,default=100,help='hidden size')  ##←ここ50から100
parser.add_argument('--out_size',type=int,default=10,help='output size')
parser.add_argument('--batch_size',type=int,default=32,help='number of batch size')
parser.add_argument('--lr',type=float,default=0.01,help='number of learning rate')
args=parser.parse_args(args=[])

# 課題２

In [None]:
criterion = nn.CrossEntropyLoss() #損失関数をCross Entropyに設定
optimizer = optim.Adam(model.parameters(),lr=args.lr) #SGDに設定→adamに設定

## 課題３

In [14]:
parser=argparse.ArgumentParser()
parser.add_argument('--epoch',type=int,default=10,help='number of epoch')
parser.add_argument('--in_size',type=int,default=784,help='input size')
parser.add_argument('--hidden_size',type=int,default=40,help='hidden size')  ##←ここ50から40
parser.add_argument('--out_size',type=int,default=10,help='output size')
parser.add_argument('--batch_size',type=int,default=32,help='number of batch size')
parser.add_argument('--lr',type=float,default=0.01,help='number of learning rate')
args=parser.parse_args(args=[])

In [15]:
criterion = nn.CrossEntropyLoss() #損失関数をCross Entropyに設定
optimizer = optim.Adam(model.parameters(),lr=args.lr) #SGDに設定→adamに設定

In [16]:
for epoch in range(args.epoch):
    run_loss=0
    correct=0
    total=0
    for i,data in enumerate(train_loader):
        inputs,labels=data
        inputs,labels=Variable(inputs),Variable(labels)
        optimizer.zero_grad()
        outputs=model(inputs,train=True)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        run_loss+=loss.item()
        if i==len(train_loader)-1:
            print('%d loss:%.7f'%(epoch+1,run_loss/len(train_loader)))
            run_loss=0
    for i,data in enumerate(test_loader):
        inputs,labels=data
        inputs,labels=Variable(inputs),Variable(labels)
        outputs=model(inputs,train=False)
        predicted=torch.argmax(outputs)
        total+=labels.size(0)
        correct+=(predicted==labels).sum()
    print("Accuracy %d /%d =%.5f" % (correct,total,float(correct)/total))
    print("========================")

1 loss:0.3751900
Accuracy 51675 /60000 =0.86125
2 loss:0.3646150
Accuracy 51775 /60000 =0.86292
3 loss:0.3696100
Accuracy 51897 /60000 =0.86495
4 loss:0.3668350
Accuracy 51656 /60000 =0.86093
5 loss:0.3679934
Accuracy 51872 /60000 =0.86453
6 loss:0.3598067
Accuracy 51934 /60000 =0.86557
7 loss:0.3633921
Accuracy 52041 /60000 =0.86735
8 loss:0.3637400
Accuracy 51950 /60000 =0.86583
9 loss:0.3580446
Accuracy 51586 /60000 =0.85977
10 loss:0.3585311
Accuracy 51932 /60000 =0.86553


> SGDに設定→adamに設定  
10 loss:0.6674661  
Accuracy 45685 /60000 =0.76142  
↓  
10 loss:0.3191071  
Accuracy 53158 /60000 =0.88597  
  
> かつ中間層を50→100に設定  
10 loss:0.3191071  
Accuracy 53158 /60000 =0.88597  
↓  
10 loss:0.3749852  
Accuracy 51765 /60000 =0.86275  


> なら中間層を100→40  
10 loss:0.3191071  
Accuracy 53158 /60000 =0.88597  
↓  
10 loss:0.3585311  
Accuracy 51932 /60000 =0.86553  



adamの中間層50が一番よさそう