In [1]:
import torch
import math
import torch.nn as nn
import torch.optim as optim
import torch.utils
import PIL
from matplotlib import pyplot as plt
from PIL import Image
from torchvision import transforms
from torchvision import datasets
import d2l
from d2l import torch as d2l
from IPython import display
import datetime
import torch.nn.functional as F

In [2]:
#Downloading CIFAR-10
data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True)
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True) #下载太慢请开代理

# 引入normalize的数据初始化
tensor_cifar10_normalize_train = datasets.CIFAR10(data_path, train=True, download=False,
                            transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.4915, 0.4823, 0.4468),
                                                     (0.2470, 0.2435, 0.2616))
                            ]))

tensor_cifar10_normalize_val = datasets.CIFAR10(data_path, train=False, download=False,
                            transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.4915, 0.4823, 0.4468),
                                                     (0.2470, 0.2435, 0.2616))
                            ]))

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# Build the dataset and DataLoader
label_map = {0: 0, 2: 1} # 占位符
class_names = ['airplane', 'bird']
# 训练集
cifar2 = [(img, label_map[label])
    for img, label in tensor_cifar10_normalize_train
        if label in [0, 2]]
# 验证集
cifar2_val = [(img, label_map[label])
   for img, label in tensor_cifar10_normalize_val
      if label in [0, 2]]

# train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)

In [4]:
# 使用torch.nn.functional实现更简洁的定义网络的方法
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [5]:
device = (torch.device('cuda') if torch.cuda.is_available()
  else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [6]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
          imgs = imgs.to(device)
          labels = labels.to(device)
          outputs = model(imgs)
          loss = loss_fn(outputs, labels)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(datetime.datetime.now(), epoch, loss_train / len(train_loader)))

In [7]:
# 8.4.1 Measuring accuracy
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

In [8]:
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

# training_loop(
#   n_epochs = 100,
#   optimizer = optimizer,
#   model = model,
#   loss_fn = loss_fn,
#   train_loader = train_loader,
# )

In [9]:
def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0
        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device)
                labels = labels.to(device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)
                total += labels.shape[0]
                correct += int((predicted == labels).sum())
        print("Accuracy {}: {:.4f}".format(name , correct / total))

In [10]:
#validate(model, train_loader, val_loader)

In [11]:
# Save and load model
torch.save(model.state_dict(), data_path + 'birds_vs_airplanes.pt')

In [12]:
# cpu加载
loaded_model = Net()
loaded_model.load_state_dict(torch.load(data_path + 'birds_vs_airplanes.pt'))

<All keys matched successfully>

In [13]:
# gpu加载
loaded_model = Net().to(device=device)
loaded_model.load_state_dict(torch.load(data_path + 'birds_vs_airplanes.pt', map_location=device))

<All keys matched successfully>

In [14]:
# 8.5 Adding memory capacity: Width
class NetWidth(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 16 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [15]:
# avoid hardcoding numbers,pass parameters use init
class NetWidth(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [16]:
net = nn.Sequential(
                    nn.Conv2d(3, 32, kernel_size=3, padding=1), nn.Tanh(),
                    nn.MaxPool2d(kernel_size=2, stride=2),
                    nn.Conv2d(32, 16, kernel_size=3, padding=1), nn.Tanh(),
                    nn.MaxPool2d(kernel_size=2, stride=2),
                    nn.Flatten(),
                    nn.Linear(16 * 8 * 8, 32), nn.Tanh(),
                    nn.Linear(32,2)
                    )

In [17]:
#查看每层卷积的形状
X = torch.rand(3,3,32,32)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'Output shape:\t',X.shape)

Conv2d Output shape:	 torch.Size([3, 32, 32, 32])
Tanh Output shape:	 torch.Size([3, 32, 32, 32])
MaxPool2d Output shape:	 torch.Size([3, 32, 16, 16])
Conv2d Output shape:	 torch.Size([3, 16, 16, 16])
Tanh Output shape:	 torch.Size([3, 16, 16, 16])
MaxPool2d Output shape:	 torch.Size([3, 16, 8, 8])
Flatten Output shape:	 torch.Size([3, 1024])
Linear Output shape:	 torch.Size([3, 32])
Tanh Output shape:	 torch.Size([3, 32])
Linear Output shape:	 torch.Size([3, 2])


In [18]:
#计算模型中所有参数的数目
sum(p.numel() for p in model.parameters())

18090

In [19]:
model2 = NetWidth()
model2.eval()

NetWidth(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1024, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [20]:
sum(p.numel() for p in model2.parameters())

38386

In [21]:
# 8.5.2 Helping our model to converge and generalize: Regularization
# L2正则化
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
        for p in model.parameters())
            loss = loss + l2_lambda * l2_norm
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(datetime.datetime.now(), epoch, loss_train / len(train_loader)))

In [22]:
# DropOut
class NetDropout(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.4)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv2_dropout = nn.Dropout2d(p=0.4)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
    
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = self.conv2_dropout(out)
        out = out.view(-1,8*8* self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [23]:
model = NetDropout()
model.train()
model.eval()

NetDropout(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_dropout): Dropout2d(p=0.4, inplace=False)
  (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_dropout): Dropout2d(p=0.4, inplace=False)
  (fc1): Linear(in_features=1024, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [24]:
# BATCH NORMALIZATION
class NetBatchNorm(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1 // 2)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
    
    def forward(self, x):
        out = self.conv1_batchnorm(self.conv1(x))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = self.conv2_batchnorm(self.conv2(out))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = out.view(-1,8*8* self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [25]:
model = NetBatchNorm()
model.eval()

NetBatchNorm(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_batchnorm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_batchnorm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=1024, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [26]:
class NetDepth(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out = F.max_pool2d(torch.relu(self.conv3(out)), 2)
        out = out.view(-1,4*4* self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [27]:
model = NetDepth()
model.eval()

NetDepth(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=256, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [28]:
# ResNet残差网络
class NetRes(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out1 = out
        out = F.max_pool2d(torch.relu(self.conv3(out)) + out1, 2)
        out = out.view(-1,4*4* self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [29]:
model = NetRes()
model.eval()

NetRes(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=256, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [30]:
# 定义一个ResNet Bolck
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1, bias=False)
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight, nonlinearity='relu')
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)
    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [31]:
model = ResBlock(3)
model.eval()

ResBlock(
  (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [32]:
# 通过堆叠resnet-block实现深度ResNet网络
class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
        *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1,8*8* self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [33]:
model = NetResDeep()
model.eval()

NetResDeep(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (resblocks): Sequential(
    (0): ResBlock(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResBlock(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): ResBlock(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): ResBlock(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (4): ResBlock

In [34]:
# 8.5.4 Comparing the designs from this section
# 对比不同架构网络的优劣

In [35]:
# BaseLine
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)

validate(model, train_loader, val_loader)

2021-10-24 09:08:58.274755 Epoch 1, Training loss 0.5824292490057125
2021-10-24 09:09:04.379817 Epoch 10, Training loss 0.34009778974162547
2021-10-24 09:09:10.787169 Epoch 20, Training loss 0.29815340820391467
2021-10-24 09:09:17.379319 Epoch 30, Training loss 0.27077277759268026
2021-10-24 09:09:24.030786 Epoch 40, Training loss 0.2498732055922982
2021-10-24 09:09:30.681399 Epoch 50, Training loss 0.2323270957845791
2021-10-24 09:09:37.314636 Epoch 60, Training loss 0.21558729516472785
2021-10-24 09:09:43.827044 Epoch 70, Training loss 0.19935288389397274
2021-10-24 09:09:50.530909 Epoch 80, Training loss 0.18338651917162974
2021-10-24 09:09:57.206021 Epoch 90, Training loss 0.16800912659449183
2021-10-24 09:10:03.789852 Epoch 100, Training loss 0.1533083135060444
Accuracy train: 0.9345
Accuracy val: 0.8875


In [36]:
# Width
model = NetWidth().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)

validate(model, train_loader, val_loader)

2021-10-24 09:10:04.910695 Epoch 1, Training loss 0.5541238353890219
2021-10-24 09:10:12.310755 Epoch 10, Training loss 0.314949399726406
2021-10-24 09:10:20.346041 Epoch 20, Training loss 0.27511174132110205
2021-10-24 09:10:28.477065 Epoch 30, Training loss 0.23893693246089728
2021-10-24 09:10:36.620288 Epoch 40, Training loss 0.2095460427130104
2021-10-24 09:10:44.682417 Epoch 50, Training loss 0.18427835960107244
2021-10-24 09:10:52.726959 Epoch 60, Training loss 0.16028781512835225
2021-10-24 09:11:01.037409 Epoch 70, Training loss 0.13716988222804039
2021-10-24 09:11:09.394669 Epoch 80, Training loss 0.11557040690996084
2021-10-24 09:11:17.489434 Epoch 90, Training loss 0.095670966775554
2021-10-24 09:11:25.512568 Epoch 100, Training loss 0.07782927581411637
Accuracy train: 0.9718
Accuracy val: 0.8970


In [37]:
# L2-REG
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)
validate(model, train_loader, val_loader)

2021-10-24 09:11:26.972561 Epoch 1, Training loss 0.6116031729112006
2021-10-24 09:11:36.307936 Epoch 10, Training loss 0.35351358819159734
2021-10-24 09:11:46.983132 Epoch 20, Training loss 0.32086193295800763
2021-10-24 09:11:57.273923 Epoch 30, Training loss 0.2964062353797779
2021-10-24 09:12:07.850323 Epoch 40, Training loss 0.2789325268025611
2021-10-24 09:12:18.469925 Epoch 50, Training loss 0.26450393315713117
2021-10-24 09:12:29.093945 Epoch 60, Training loss 0.2513328030420716
2021-10-24 09:12:39.600604 Epoch 70, Training loss 0.23982535055868184
2021-10-24 09:12:50.017184 Epoch 80, Training loss 0.22974915356393072
2021-10-24 09:13:00.645942 Epoch 90, Training loss 0.22088077512516338
2021-10-24 09:13:11.249288 Epoch 100, Training loss 0.2125840371201752
Accuracy train: 0.9238
Accuracy val: 0.8855


In [38]:
# dropout
model = NetDropout().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)
validate(model, train_loader, val_loader)

2021-10-24 09:13:12.546927 Epoch 1, Training loss 0.5811816271702954
2021-10-24 09:13:20.047160 Epoch 10, Training loss 0.38090168651501843
2021-10-24 09:13:28.294411 Epoch 20, Training loss 0.34809785616246
2021-10-24 09:13:36.512001 Epoch 30, Training loss 0.3335491590628958
2021-10-24 09:13:44.962461 Epoch 40, Training loss 0.31835454844745104
2021-10-24 09:13:53.363426 Epoch 50, Training loss 0.30043149962546717
2021-10-24 09:14:01.674050 Epoch 60, Training loss 0.2903287018750124
2021-10-24 09:14:10.099904 Epoch 70, Training loss 0.27566998931253034
2021-10-24 09:14:18.399437 Epoch 80, Training loss 0.2647757158157932
2021-10-24 09:14:26.914142 Epoch 90, Training loss 0.2560412506009363
2021-10-24 09:14:35.314404 Epoch 100, Training loss 0.24476294200511495
Accuracy train: 0.8949
Accuracy val: 0.8725


In [39]:
# batch-norm
model = NetBatchNorm().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)
validate(model, train_loader, val_loader)

2021-10-24 09:14:36.751522 Epoch 1, Training loss 0.4462037251633444
2021-10-24 09:14:45.019397 Epoch 10, Training loss 0.2653114297872136
2021-10-24 09:14:54.226660 Epoch 20, Training loss 0.20534340567459727
2021-10-24 09:15:03.466422 Epoch 30, Training loss 0.15773218993548374
2021-10-24 09:15:12.663165 Epoch 40, Training loss 0.11624795588765555
2021-10-24 09:15:21.809689 Epoch 50, Training loss 0.08143674506313482
2021-10-24 09:15:30.965873 Epoch 60, Training loss 0.05278764862068899
2021-10-24 09:15:40.062880 Epoch 70, Training loss 0.033547059371830175
2021-10-24 09:15:49.135889 Epoch 80, Training loss 0.0249861352691415
2021-10-24 09:15:58.133812 Epoch 90, Training loss 0.01445921497111013
2021-10-24 09:16:07.328517 Epoch 100, Training loss 0.00978637192172894
Accuracy train: 0.9919
Accuracy val: 0.8820


In [40]:
# depth
model = NetDepth().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)
validate(model, train_loader, val_loader)

2021-10-24 09:16:08.832640 Epoch 1, Training loss 0.6696571468547651
2021-10-24 09:16:17.299201 Epoch 10, Training loss 0.3425118222737768
2021-10-24 09:16:26.618532 Epoch 20, Training loss 0.2969878880651134
2021-10-24 09:16:36.012895 Epoch 30, Training loss 0.2666872978020626
2021-10-24 09:16:45.468434 Epoch 40, Training loss 0.24082016726587988
2021-10-24 09:16:55.006942 Epoch 50, Training loss 0.2158025407297596
2021-10-24 09:17:04.367028 Epoch 60, Training loss 0.19011007647985106
2021-10-24 09:17:13.731457 Epoch 70, Training loss 0.16565029114294963
2021-10-24 09:17:23.075122 Epoch 80, Training loss 0.13943742343764395
2021-10-24 09:17:32.653569 Epoch 90, Training loss 0.11420408664211916
2021-10-24 09:17:42.414246 Epoch 100, Training loss 0.09182678348129722
Accuracy train: 0.9285
Accuracy val: 0.8800


In [41]:
# RES
model = NetRes().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)
validate(model, train_loader, val_loader)

2021-10-24 09:17:43.980826 Epoch 1, Training loss 0.6366562687667312
2021-10-24 09:17:52.969054 Epoch 10, Training loss 0.33136845014657185
2021-10-24 09:18:02.881987 Epoch 20, Training loss 0.29083979101317703
2021-10-24 09:18:12.721063 Epoch 30, Training loss 0.25499793365123163
2021-10-24 09:18:22.706643 Epoch 40, Training loss 0.22438272155204397
2021-10-24 09:18:32.706656 Epoch 50, Training loss 0.19872367832880872
2021-10-24 09:18:42.619525 Epoch 60, Training loss 0.17619514536515923
2021-10-24 09:18:52.567528 Epoch 70, Training loss 0.15467868059588846
2021-10-24 09:19:02.469907 Epoch 80, Training loss 0.13344264991439073
2021-10-24 09:19:12.171441 Epoch 90, Training loss 0.11253324694409492
2021-10-24 09:19:21.641279 Epoch 100, Training loss 0.09167177724847748
Accuracy train: 0.9695
Accuracy val: 0.9065


In [42]:
# RES-DEEP
model = NetResDeep().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
  n_epochs = 100,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader,
)
validate(model, train_loader, val_loader)

2021-10-24 09:19:26.151268 Epoch 1, Training loss 0.5274993185025112
2021-10-24 09:20:01.733519 Epoch 10, Training loss 0.22032921694836038
2021-10-24 09:20:41.181698 Epoch 20, Training loss 0.11168192304480987
2021-10-24 09:21:20.635993 Epoch 30, Training loss 0.07389530469515142
2021-10-24 09:22:00.176135 Epoch 40, Training loss 0.03434919258045733
2021-10-24 09:22:39.794605 Epoch 50, Training loss 0.05781054924165083
2021-10-24 09:23:19.477472 Epoch 60, Training loss 0.027413468958089236
2021-10-24 09:23:59.217434 Epoch 70, Training loss 0.0025167885399757215
2021-10-24 09:24:38.856528 Epoch 80, Training loss 0.0014126229943013542
2021-10-24 09:25:18.369825 Epoch 90, Training loss 0.0007876235621245015
2021-10-24 09:25:57.878408 Epoch 100, Training loss 0.0006111824529230295
Accuracy train: 0.9997
Accuracy val: 0.8945
