In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

In [2]:
!nvidia-smi

Mon Nov  9 21:15:52 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.79       Driver Version: 410.79       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:3D:00.0 Off |                    0 |
| N/A   41C    P0    53W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           Off  | 00000000:3E:00.0 Off |                    0 |
| N/A   39C    P0    81W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  Tesla K80           Off  | 00000000:88:00.0 Off |                    0 |
| N/A   

In [3]:
class BasicBlock(nn.Module):
    
    def __init__(self, in_channels, channels, stride):
        
        super(BasicBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=channels,
                               kernel_size=3, stride=stride,
                               padding=1, bias=False)
        
        self.bn1 = nn.BatchNorm2d(channels)
        
        self.conv2 = nn.Conv2d(in_channels=channels,
                             out_channels=channels,
                             kernel_size=3, stride=1,
                             padding=1, bias=False)
        
        self.bn2 = nn.BatchNorm2d(channels)
        
        self.shortcut = nn.Sequential()
        
        if stride != 1 or in_channels != channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels=in_channels,
                          out_channels=channels,
                          kernel_size=1, stride=stride,
                          padding=0, bias=False),
                nn.BatchNorm2d(channels)
            )
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [4]:
class BottleNeck(nn.Module):
    
    expansion = 4
    
    def __init__(self, in_channels, channels, stride):
        
        super(BottleNeck, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=channels,
                               kernel_size=1, stride=1,
                               padding=0, bias=False)
        
        self.bn1 = nn.BatchNorm2d(channels)
        
        self.conv2 = nn.Conv2d(in_channels=channels,
                               out_channels=channels,
                               kernel_size=3, stride=stride,
                               padding=1, bias=False)
        
        self.bn2 = nn.BatchNorm2d(channels)
        
        self.conv3 = nn.Conv2d(in_channels=channels,
                               out_channels=self.expansion * channels,
                               kernel_size=1, stride=1,
                               padding=0, bias=False)
        
        self.bn3 = nn.BatchNorm2d(self.expansion * channels)
        
        self.shortcut = nn.Sequential()
        
        if stride != 1 or in_channels != self.expansion * channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels=in_channels,
                          out_channels=self.expansion * channels,
                          kernel_size=1, stride=stride,
                          padding=0, bias=False),
                nn.BatchNorm2d(self.expansion * channels)
            )
        
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [5]:
class ResNet18(nn.Module):
    def __init__(self):
        
        super(ResNet18, self).__init__()
        
        self.conv = nn.Conv2d(in_channels=3, out_channels=64,
                              kernel_size=7, stride=2,
                              padding=3)
        
        self.bn = nn.BatchNorm2d(64)
        
        self.maxpool2d = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        layers = []
        
        layers.append(BasicBlock(64, 64, 1))
        layers.append(BasicBlock(64, 64, 1))
        
        layers.append(BasicBlock(64, 128, 2))
        layers.append(BasicBlock(128, 128, 1))
        
        layers.append(BasicBlock(128, 256, 2))
        layers.append(BasicBlock(256, 256, 1))
        
        layers.append(BasicBlock(256, 512, 2))
        layers.append(BasicBlock(512, 512, 1))
        
        self.layers = nn.Sequential(*layers)
        
        self.avgpool2d = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(512, 10)
        
    def forward(self, x):
        out = F.relu(self.bn(self.conv(x)))
        
        out = self.maxpool2d(out)
        
        out = self.layers(out)
        
        out = self.avgpool2d(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc(out)
        
        return out

In [6]:
class ResNet50(nn.Module):
    def __init__(self):
        
        super(ResNet50, self).__init__()
        
        self.conv = nn.Conv2d(in_channels=3, out_channels=64,
                              kernel_size=7, stride=2,
                              padding=3)
        
        self.bn = nn.BatchNorm2d(64)
        
        self.maxpool2d = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        layers = []
        
        layers.append(BottleNeck(64, 64, 1)) # 这个block只是增加了图片的通道数,并没有改变图片的大小
        layers.append(BottleNeck(256, 64, 1))
        layers.append(BottleNeck(256, 64, 1))
        
        layers.append(BottleNeck(256, 128, 2)) # 这个block增加了图片的通道数,也改变了图片的大小
        layers.append(BottleNeck(512, 128, 1))
        layers.append(BottleNeck(512, 128, 1))
        layers.append(BottleNeck(512, 128, 1))
        
        layers.append(BottleNeck(512, 256, 2)) # 这个block增加了图片的通道数,也改变了图片的大小
        layers.append(BottleNeck(1024, 256, 1))
        layers.append(BottleNeck(1024, 256, 1))
        layers.append(BottleNeck(1024, 256, 1))
        layers.append(BottleNeck(1024, 256, 1))
        layers.append(BottleNeck(1024, 256, 1))
        
        layers.append(BottleNeck(1024, 512, 2)) # 这个block增加了图片的通道数,也改变了图片的大小
        layers.append(BottleNeck(2048, 512, 1))
        layers.append(BottleNeck(2048, 512, 1))
        
        self.layers = nn.Sequential(*layers)
        
        self.avgpool2d = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(2048, 10)
    
    def forward(self, x):
        out = F.relu(self.bn(self.conv(x)))
        
        out = self.maxpool2d(out)
        
        out = self.layers(out)
        
        out = self.avgpool2d(out)
        
        out = out.view(out.size(0), -1)
        
        out = self.fc(out)
        
        return out

In [7]:
transform = transforms.Compose(
    [transforms.Resize(224),  # 长宽比不变，保持最短边为224
     transforms.CenterCrop(224),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [8]:
BATCH_SIZE = 64
EPOCH = 20

In [9]:
train_set = datasets.CIFAR10(root='./data/', train=True,
                            download=True, transform=transform)

DATASET_SIZE = len(train_set.data)

train_set, valid_set = torch.utils.data.random_split(train_set, [int(DATASET_SIZE * 0.8), int(DATASET_SIZE * 0.2)])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                          shuffle=True)
 
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=BATCH_SIZE,
                                          shuffle=True)

test_set = datasets.CIFAR10(root='./data/', train=False,
                           download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE,
                                         shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [10]:
def train(
    model,
    data_loader, 
    optimizer,
    criterion,
    print_every=30
    ):

    model.train()

    print_loss_total = 0
    epoch_loss = 0
    for index, (images, labels) in enumerate(data_loader):
        
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        print_loss_total += loss.item()
        epoch_loss += loss.item()
        
        loss.backward()

        optimizer.step()

        if print_every and (index + 1) % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('\tCurrent Loss: %.4f' % print_loss_avg)

    return epoch_loss / len(data_loader)

In [11]:
def evaluate(
    model,
    data_loader,
    ):

    model.eval()

    with torch.no_grad():
        valid_correct = 0
        for images, labels in data_loader:

            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images)

            _, predicted = torch.max(outputs, 1)

            valid_correct += (predicted == labels).sum().item()
    
    return 100.0 * valid_correct / len(data_loader.dataset)

In [12]:
def test(
    model,
    data_loader,
    ):
    
    model.eval()
    
    with torch.no_grad():
        test_correct = 0
        for images, labels in data_loader:

            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images)

            _, predicted = torch.max(outputs, 1)

            test_correct += (predicted == labels).sum().item()

    print("The accuracy of total {} images: {}%".format(len(data_loader.dataset),
                                                                  100.0 * test_correct / len(data_loader.dataset)))

In [13]:
official_resnet50 = models.resnet50(pretrained=False)
official_resnet18 = models.resnet18(pretrained=False)

In [14]:
official_resnet50.fc = nn.Linear(2048, 10)
official_resnet18.fc = nn.Linear(512, 10)

In [15]:
custom_resnet50 = ResNet50()
custom_resnet18 = ResNet18()

In [16]:
criterion = nn.CrossEntropyLoss()

In [17]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
official_resnet18.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
best_valid_acc = -1
optimizer = torch.optim.Adam(official_resnet18.parameters())

for epoch in range(EPOCH):
   
    train_loss = train(official_resnet18, train_loader, optimizer, criterion)
    valid_acc = evaluate(official_resnet18, valid_loader)
    
    print('Train Epoch {} Train Loss: {} Valid Acc: {}%'.format(epoch, train_loss, valid_acc))
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(official_resnet18.state_dict(), 'official_resnet18.pth')

	Current Loss: 2.1720
	Current Loss: 1.8507
	Current Loss: 1.7559
	Current Loss: 1.7065
	Current Loss: 1.6064
	Current Loss: 1.5855
	Current Loss: 1.5887
	Current Loss: 1.4584
	Current Loss: 1.5043
	Current Loss: 1.4553
	Current Loss: 1.3496
	Current Loss: 1.3513
	Current Loss: 1.3049
	Current Loss: 1.2444
	Current Loss: 1.2438
	Current Loss: 1.3089
	Current Loss: 1.2326
	Current Loss: 1.0851
	Current Loss: 1.1696
	Current Loss: 1.1600
Train Epoch 0 Train Loss: 1.4424649998664856 Valid Acc: 54.78%
	Current Loss: 1.0814
	Current Loss: 1.0491
	Current Loss: 1.0384
	Current Loss: 0.9840
	Current Loss: 1.0357
	Current Loss: 1.0321
	Current Loss: 0.9522
	Current Loss: 0.9273
	Current Loss: 0.9549
	Current Loss: 0.9623
	Current Loss: 0.8836
	Current Loss: 0.9325
	Current Loss: 0.9071
	Current Loss: 0.8672
	Current Loss: 0.8506
	Current Loss: 0.7921
	Current Loss: 0.8637
	Current Loss: 0.8447
	Current Loss: 0.8322
	Current Loss: 0.8088
Train Epoch 1 Train Loss: 0.9224644768714905 Valid Acc: 6

In [19]:
official_resnet18.load_state_dict(torch.load('official_resnet18.pth'))
test(official_resnet18, test_loader)

The accuracy of total 10000 images: 83.77%


In [20]:
DEVICE = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
official_resnet50.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [21]:
best_valid_acc = -1
optimizer = torch.optim.Adam(official_resnet50.parameters())

for epoch in range(EPOCH):
   
    train_loss = train(official_resnet50, train_loader, optimizer, criterion)
    valid_acc = evaluate(official_resnet50, valid_loader)
    
    print('Train Epoch {} Train Loss: {} Valid Acc: {}%'.format(epoch, train_loss, valid_acc))
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(official_resnet50.state_dict(), 'official_resnet50.pth')

	Current Loss: 2.4300
	Current Loss: 1.9947
	Current Loss: 1.9188
	Current Loss: 1.8566
	Current Loss: 1.8193
	Current Loss: 1.7579
	Current Loss: 1.7678
	Current Loss: 1.6738
	Current Loss: 1.6739
	Current Loss: 1.6455
	Current Loss: 1.5805
	Current Loss: 1.5963
	Current Loss: 1.4607
	Current Loss: 1.4807
	Current Loss: 1.4927
	Current Loss: 1.4394
	Current Loss: 1.3930
	Current Loss: 1.4134
	Current Loss: 1.2925
	Current Loss: 1.3226
Train Epoch 0 Train Loss: 1.637575026321411 Valid Acc: 49.0%
	Current Loss: 1.2596
	Current Loss: 1.3304
	Current Loss: 1.2356
	Current Loss: 1.2171
	Current Loss: 1.1899
	Current Loss: 1.2334
	Current Loss: 1.1533
	Current Loss: 1.1363
	Current Loss: 1.1035
	Current Loss: 1.1550
	Current Loss: 1.1218
	Current Loss: 1.1235
	Current Loss: 1.0699
	Current Loss: 1.0150
	Current Loss: 1.0865
	Current Loss: 1.0783
	Current Loss: 0.9764
	Current Loss: 1.0424
	Current Loss: 1.0007
	Current Loss: 0.9792
Train Epoch 1 Train Loss: 1.122745821762085 Valid Acc: 61.5

In [22]:
official_resnet50.load_state_dict(torch.load('official_resnet50.pth'))
test(official_resnet50, test_loader)

The accuracy of total 10000 images: 83.42%


In [23]:
DEVICE = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
custom_resnet50.to(DEVICE)

ResNet50(
  (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool2d): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layers): Sequential(
    (0): BottleNeck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, aff

In [24]:
best_valid_acc = -1
optimizer = torch.optim.Adam(custom_resnet50.parameters())

for epoch in range(EPOCH):
   
    train_loss = train(custom_resnet50, train_loader, optimizer, criterion)
    valid_acc = evaluate(custom_resnet50, valid_loader)
    
    print('Train Epoch {} Train Loss: {} Valid Acc: {}%'.format(epoch, train_loss, valid_acc))
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(custom_resnet50.state_dict(), 'custom_resnet50.pth')

	Current Loss: 2.4353
	Current Loss: 2.1022
	Current Loss: 1.9107
	Current Loss: 1.8788
	Current Loss: 1.8638
	Current Loss: 1.8533
	Current Loss: 1.7530
	Current Loss: 1.7616
	Current Loss: 1.7367
	Current Loss: 1.6974
	Current Loss: 1.6478
	Current Loss: 1.6545
	Current Loss: 1.6548
	Current Loss: 1.6525
	Current Loss: 1.6008
	Current Loss: 1.5538
	Current Loss: 1.4828
	Current Loss: 1.4587
	Current Loss: 1.5315
	Current Loss: 1.4594
Train Epoch 0 Train Loss: 1.7207319160461425 Valid Acc: 47.75%
	Current Loss: 1.4077
	Current Loss: 1.3250
	Current Loss: 1.3381
	Current Loss: 1.3222
	Current Loss: 1.2501
	Current Loss: 1.2958
	Current Loss: 1.2306
	Current Loss: 1.2643
	Current Loss: 1.2280
	Current Loss: 1.2686
	Current Loss: 1.2641
	Current Loss: 1.2009
	Current Loss: 1.1924
	Current Loss: 1.1977
	Current Loss: 1.2016
	Current Loss: 1.1219
	Current Loss: 1.1293
	Current Loss: 1.1038
	Current Loss: 1.1449
	Current Loss: 1.1084
Train Epoch 1 Train Loss: 1.2245042080879212 Valid Acc: 5

In [25]:
custom_resnet50.load_state_dict(torch.load('custom_resnet50.pth'))
test(custom_resnet50, test_loader)

The accuracy of total 10000 images: 81.52%


In [26]:
DEVICE = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
custom_resnet18.to(DEVICE)

ResNet18(
  (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool2d): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layers): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1,

In [27]:
best_valid_acc = -1
optimizer = torch.optim.Adam(custom_resnet18.parameters())

for epoch in range(EPOCH):
   
    train_loss = train(custom_resnet18, train_loader, optimizer, criterion)
    valid_acc = evaluate(custom_resnet18, valid_loader)
    
    print('Train Epoch {} Train Loss: {} Valid Acc: {}%'.format(epoch, train_loss, valid_acc))
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(custom_resnet18.state_dict(), 'custom_resnet18.pth')

	Current Loss: 2.2888
	Current Loss: 1.9298
	Current Loss: 1.8464
	Current Loss: 1.7634
	Current Loss: 1.7513
	Current Loss: 1.7119
	Current Loss: 1.6601
	Current Loss: 1.6776
	Current Loss: 1.6055
	Current Loss: 1.5759
	Current Loss: 1.5508
	Current Loss: 1.5211
	Current Loss: 1.4664
	Current Loss: 1.4488
	Current Loss: 1.4054
	Current Loss: 1.3424
	Current Loss: 1.2840
	Current Loss: 1.2775
	Current Loss: 1.2538
	Current Loss: 1.2935
Train Epoch 0 Train Loss: 1.5696773218154907 Valid Acc: 46.71%
	Current Loss: 1.2148
	Current Loss: 1.2132
	Current Loss: 1.1896
	Current Loss: 1.1708
	Current Loss: 1.1561
	Current Loss: 1.1039
	Current Loss: 1.0835
	Current Loss: 1.0932
	Current Loss: 1.0961
	Current Loss: 1.0647
	Current Loss: 1.0473
	Current Loss: 1.0392
	Current Loss: 1.0062
	Current Loss: 0.9624
	Current Loss: 0.9906
	Current Loss: 1.0160
	Current Loss: 0.9327
	Current Loss: 0.9100
	Current Loss: 0.9049
	Current Loss: 0.9188
Train Epoch 1 Train Loss: 1.049575322818756 Valid Acc: 64

In [28]:
custom_resnet18.load_state_dict(torch.load('custom_resnet18.pth'))
test(custom_resnet18, test_loader)

The accuracy of total 10000 images: 82.51%
