In [1]:
# %matplotlib inline
# from matplotlib import pyplot as plt
# import numpy as np
# import collections
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x1058f5f00>

# 1

In [5]:
# 3.5.1節のモデル
class NetWidth(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = torch.fc2(out)
        return out
    
# kernel_size = 5のモデル
class NetNew(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = torch.fc2(out)
        return out

## (a)

In [6]:
model = NetWidth()
sum(p.numel() for p in model.parameters())

38386

In [7]:
new_model = NetNew()
sum(p.numel() for p in new_model.parameters())

48114

conv層の入出力のサイズは変わらないが、カーネルのサイズが大きくなった分、入力画像 \* カーネルの計算量が増える。その分パラメータも増える。

## (b)
過学習しやすくなっている。パラメータが増えたから。

## (c)
urlが少し変わってそう(https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d)

## (d)
高さ1幅3のkernelを使うことになる。  
paddingもそれに合わせて(0,1)に設定することが推奨される。

## (e)
このようなカーネルを用いることで、横に長く続く特徴を捉えやすくなる。

# 2

In [8]:
from torchvision import datasets, transforms
data_path = '../chap7/data'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out
    
loaded_model = Net()
loaded_model.load_state_dict(torch.load("birds_vs_airplanes.pt"))

<All keys matched successfully>

In [16]:
def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0
        
        with torch.no_grad():
            for imgs, labels in loader:
                outputs = model(imgs)
                
                print(nn.Softmax(dim=1)(outputs))
                print(labels)
                break
                
                _, predicted = torch.max(outputs, dim=1)
                
                total += labels.shape[0]
                
                correct += int((predicted==labels).sum())
#             print("Accurary {}: {:.2f}".format(name, correct / total))

In [17]:
validate(loaded_model, train_loader, val_loader)

tensor([[6.0583e-03, 9.9394e-01],
        [5.3286e-02, 9.4671e-01],
        [9.7912e-01, 2.0877e-02],
        [1.3812e-02, 9.8619e-01],
        [9.9701e-01, 2.9871e-03],
        [3.8812e-01, 6.1188e-01],
        [3.3755e-03, 9.9662e-01],
        [1.0280e-05, 9.9999e-01],
        [9.9670e-01, 3.3042e-03],
        [5.1006e-01, 4.8994e-01],
        [1.8710e-03, 9.9813e-01],
        [1.5123e-02, 9.8488e-01],
        [9.7878e-01, 2.1221e-02],
        [3.2126e-02, 9.6787e-01],
        [5.4583e-02, 9.4542e-01],
        [7.2513e-01, 2.7487e-01],
        [9.9533e-01, 4.6679e-03],
        [2.1662e-01, 7.8338e-01],
        [9.6214e-03, 9.9038e-01],
        [8.2025e-01, 1.7975e-01],
        [3.6460e-01, 6.3540e-01],
        [9.5831e-01, 4.1691e-02],
        [1.0057e-02, 9.8994e-01],
        [5.7914e-04, 9.9942e-01],
        [2.0667e-03, 9.9793e-01],
        [3.2939e-03, 9.9671e-01],
        [3.6856e-01, 6.3144e-01],
        [4.2809e-02, 9.5719e-01],
        [1.4512e-01, 8.5488e-01],
        [9.929

In [18]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

ラベルが0でも2でもないのに、どちらかの予測確率が95%を超えるものが多数。  

## (a)
## (b)
## (c)
省略