In [100]:
# future
from __future__ import print_function

In [101]:
# third party
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets
from torchvision import transforms

In [102]:
# let's prepare parameters パラメータ定義
class Args():
    def __init__(self):
        super(Args, self).__init__()
        
        # 何個ずつデータを処理するか？
        self.batch_size = 64
        
        # 全てのデータを何周回すか？
        self.epochs = 2
#        self.epochs = 14
        
        # learning rate
        self.lr = 1.0
        
        # ガンマ。LRが回るたびにへったり小さくなったり、をどれくらい小さくするか。LRの調整かオプティマイズて使用するパラメータ
        self.gamma = 0.7
        
        # True: CPU, False: GPU
        self.no_cuda = False
        
        # 通常は不要。Trueになると1　epoch回した時に止める。実験で試したい時に使用
        self.dry_run = False
        
        # 乱数固定することで全く同じ結果を再現する
        self.seed = 42
        
        # 64個つづアーキテクチャにブッコム。これを１つとしたときに100stepごとにログを出すと言う設定
        self.log_interval = 100
        
        # トレーニングしたweightやparamsを保存するか
        self.save_model = True
        
        # テストをするときに1000個ずつ画像を処理する
        self.test_batch_size = 1000
        
args = Args()

# check it
args.test_batch_size
args.no_cuda

False

In [103]:
# we use cpu
use_cuda = False

# torchの乱数の初期値を42にセット
torch.manual_seed(args.seed)

# use_cudaがTrueであればcuda、それ以外はcpuを入れる
device = torch.device("cuda" if use_cuda else "cpu")


In [104]:
# cpu
# device = torch.device("cpu")

# gpu
# device = torch.device("cuda")

# GPUは使うものを指定可能
# device = torch.device("cuda:0")
# device = torch.device("cuda:1")

## datasets

In [105]:
# 画像をTensorに変換
# ノーマライズする
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

# トレーニングデータをダウンロードする
# トレーニングする
dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform)

# トレーニングしない
# validationデータセット
dataset2 = datasets.MNIST("../data", train=False, transform=transform)


In [106]:
x_sample, y_sample = dataset1[129]
x_sample.shape, y_sample

(torch.Size([1, 28, 28]), 6)

In [109]:
# タプル型である、2つのデータが帰って来ている
len(dataset1[129])

2

In [110]:
len(dataset1), len(dataset2)

(60000, 10000)

In [111]:
# add some other params for dataloaders
# データセットを使用し、バッチ毎にデータをとってこれるdataloaderを定義

train_kwargs = {"batch_size": args.batch_size}
test_kwargs = {"batch_size": args.test_batch_size}
if use_cuda:
    cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)
    
train_kwargs

# batch_size: 64個ずつ
# num_workers: 画像をTensorにしたりargに入れる。Python的にいくつのプロセス、スレッドで実行するか
# pin_memory: メモリを効率よく使う指定： True
# shuffle: データを取り出すときに順番をシャッフルして使う


{'batch_size': 64}

In [112]:
# prepare data loader
# **２つで情報が順番に入る

# training dataloader
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)

# validation dataloader
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

In [113]:
len(train_loader)

938

In [114]:
938 * args.batch_size

60032

In [115]:
x_sample1, y_sample1 = next(iter(train_loader))
x_sample1.shape, y_sample1.shape

### End of Dataset, Dataloader

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

## architecture

In [116]:
# architecture

# nn.ModuleはPytorchの基本class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # 画像処理の鉄板であるconvlutionを2つ定義
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        
        # overfittingを防ぐためのdropoutを2つ定義
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        
        # 行列演算を2つ定義
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    # Pytorchで必ず必要なfunction
    # forwardで実際にxとしてデータが入ってくる
    def forward(self, x):
        x = self.conv1(x)
        
        # 不連続な処理を入れるためにreluを入れる
        x = F.relu(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        
        # 画像の大きさを縦横半分にする
        x = F.max_pool2d(x, 2)
        
        x = self.dropout1(x)
        
        # Tensorの形を平す
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        
        # softmaxはpercent化のようなもの
        # (いぬ、ねこ、とり) (0.6, 0.3, 0.1)
        
        # log_softmax
        output = F.log_softmax(x, dim=1)
        return output
        


In [117]:
model = Net().to(device)

optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

# 変更を担当する。epochごとに
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

model

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

## training loop

In [118]:
# training loop

for epoch in range(1, args.epochs + 1):

    #     train(args, model, device, train_loader, optimizer, epoch)
    # training 
    
    # モデルを学習モードにする
    # model.eval() < 推論モード
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        # data.shape: (64,1,28,28), target.shape: (64)
        data, target = data.to(device), target.to(device)
        
        # 微分のデータおwいあt
        optimizer.zero_grad()
        
        
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    100.0 * batch_idx / len(train_loader),
                    
                    # 実際どのitemか
                    loss.item(),
                )
            )
            if args.dry_run:
                break

    # test(model, device, test_loader)
    
    # validation
    # モデルを評価モードに切り替える
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            
            # ロスを追加で足す
            test_loss += F.nll_loss(
                output, target, reduction="sum"
            ).item()  # sum up batch loss
            
            pred = output.argmax(
                dim=1, keepdim=True
            )  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
            test_loss,
            correct,
            len(test_loader.dataset),
            100.0 * correct / len(test_loader.dataset),
        )
    )
    
    # update 
    scheduler.step()
    
    
    


Test set: Average loss: 0.0539, Accuracy: 9822/10000 (98%)


Test set: Average loss: 0.0357, Accuracy: 9881/10000 (99%)



In [81]:
# save model

# state_dictは現在の値
if args.save_model:
    torch.save(model.state_dict(), "mnist_cnn.pth")
    

In [82]:
# load and use it

## How to train a model with MY DATA!

In [83]:
# 
# https://github.com/myleott/mnist_png

In [84]:
# from torchaudio.datasets.utils import walk_files

from typing import Any, Iterable, List, Optional, Tuple, Union

def walk_files(root: str,
               suffix: Union[str, Tuple[str]],
               prefix: bool = False,
               remove_suffix: bool = False) -> Iterable[str]:
    """List recursively all files ending with a suffix at a given root
    Args:
        root (str): Path to directory whose folders need to be listed
        suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png').
            It uses the Python "str.endswith" method and is passed directly
        prefix (bool, optional): If true, prepends the full path to each result, otherwise
            only returns the name of the files found (Default: ``False``)
        remove_suffix (bool, optional): If true, removes the suffix to each result defined in suffix,
            otherwise will return the result as found (Default: ``False``).
    """

    root = os.path.expanduser(root)

    for dirpath, dirs, files in os.walk(root):
        dirs.sort()
        # `dirs` is the list used in os.walk function and by sorting it in-place here, we change the
        # behavior of os.walk to traverse sub directory alphabetically
        # see also
        # https://stackoverflow.com/questions/6670029/can-i-force-python3s-os-walk-to-visit-directories-in-alphabetical-order-how#comment71993866_6670926
        files.sort()
        for f in files:
            if f.endswith(suffix):

                if remove_suffix:
                    f = f[: -len(suffix)]

                if prefix:
                    f = os.path.join(dirpath, f)

                yield f


In [85]:
import os

walker = walk_files(
    "/disk2/data/mnist_png/mnist_png/training",
    suffix="png",
    prefix=True,
    remove_suffix=False
)
_walker = list(walker)

In [86]:
# _walker

In [87]:
from torch.utils.data import Dataset

In [88]:
# check datasets1
dataset1[0][0].shape, dataset1[0][1]

(torch.Size([1, 28, 28]), 5)

In [89]:
from PIL import Image

# transform = transforms.Compose(
#     [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
# )

class MyDataset(Dataset):
    def __init__(self, data_list):
        """
        MyDataset based on Dataset
        """
        super(MyDataset, self).__init__()
        self.data_list = data_list
        
        # 同じデータ処理を定義しておく
        self.toTensor = transforms.ToTensor()
        self.normalize = transforms.Normalize((0.1307,), (0.3081,))
        
    # indexを受け取って、データを返す    
    def __getitem__(self, index):
        
        # get path
        _path = self.data_list[index]
        
        # get label
        # 最後から2つ目をlabelとして取得
        _label = _path.split("/")[-2]
        
        # read image 
        img = Image.open(_path)
        
        # apply transforms
        img = self.toTensor(img)
        img = self.normalize(img)
         
        return img, int(_label)

    # データの数
    def __len__(self):
        return len(self.data_list)


In [96]:
my_dataset = MyDataset(_walker)

In [97]:
len(my_dataset)

0

In [98]:
my_dataset[0][0].shape, my_dataset[0][1]

IndexError: list index out of range

In [None]:
my_train_loader = torch.utils.data.DataLoader(my_dataset, **train_kwargs)

In [None]:
# define train block
def train(args, model, device, train_loader, optimizer, epoch):
    
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    100.0 * batch_idx / len(train_loader),
                    loss.item(),
                )
            )
            if args.dry_run:
                break
                
                

In [None]:
# define test block
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(
                output, target, reduction="sum"
            ).item()  # sum up batch loss
            pred = output.argmax(
                dim=1, keepdim=True
            )  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
            test_loss,
            correct,
            len(test_loader.dataset),
            100.0 * correct / len(test_loader.dataset),
        )
    )



In [99]:
# training loop 
for epoch in range(1, args.epochs + 1):
    
    # training loop
    train(args, model, device, my_train_loader, optimizer, epoch)
    
    # validation loop
    test(model, device, test_loader)
    scheduler.step()



Test set: Average loss: 0.0282, Accuracy: 9903/10000 (99%)


Test set: Average loss: 0.0282, Accuracy: 9903/10000 (99%)

