In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Subset
from torchvision import models
    
from sklearn.metrics import accuracy_score

from datetime import datetime
from time import time
import os
import shutil

## Check CUDA

In [2]:
if torch.cuda.is_available():
    cudnn.benchmark = True
    device = "cuda"
    print(torch.cuda.get_device_name())
else:
    device = "cpu"
    print("Use CPU")


Quadro RTX 3000 with Max-Q Design


## Load data

In [4]:
# train 只有 ToTensor()、Normalize()的話，會發生 overfit，Valid 無法收斂
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
transform_valid = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

# Create dataset(use 100 data for my laptop)
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
valid_set = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_valid, download=True)
train_set = Subset(train_set, list(range(0, 70)))
valid_set = Subset(valid_set, list(range(70, 100)))

# Create data loaders for our datasets
BATCH = 5
train_loader = DataLoader(train_set, batch_size=BATCH, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=BATCH, shuffle=False)

print(f'## Training set has {len(train_set)} instances.')
print(f'## Validation set has {len(valid_set)} instances.')

Files already downloaded and verified
Files already downloaded and verified
## Training set has 70 instances.
## Validation set has 30 instances.


In [6]:
for i, data in enumerate(valid_loader):
    print(data[1])

tensor([2, 6, 8, 8, 0])
tensor([2, 9, 3, 3, 8])
tensor([8, 1, 1, 7, 2])
tensor([5, 2, 7, 8, 9])
tensor([0, 3, 8, 6, 4])
tensor([6, 6, 0, 0, 7])


## Build model

In [7]:
model = models.resnet18(weights="IMAGENET1K_V1").to(device) # 使用 pretrain model 比較好收斂

## Loss function(Criterion) & Optimizer

In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

## Training

In [9]:
def train_one_epoch(epoch_index, tb_writer):
    total_vloss, total_vcorrect, total_vsamples = 0.0, 0.0, 0.0
    running_loss, running_acc = 0.0, 0.0
    last_loss, last_acc = 0.0, 0.0

    START_TIME = time()
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero your gradients for every batch
        optimizer.zero_grad()
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        running_loss += loss.item()
        running_acc += (outputs.argmax(dim=1) == labels).float().mean().item()

        total_vloss += loss.item()
        total_vcorrect += (outputs.argmax(dim=1) == labels).sum().item()
        total_vsamples += labels.size(0)

        if i % 10 == 9:
            last_loss = running_loss / 10
            last_acc = running_acc / 10
            # print(f' - Batch {i+1} loss: {last_loss:.4f} / accuracy: {last_acc:.4f}')
            running_loss, running_acc = 0.0, 0.0

    END_TIME = time()

    return total_vloss / (i + 1), total_vcorrect / total_vsamples, (END_TIME - START_TIME)

def reset_folder():
    shutil.rmtree("./weights")
    shutil.rmtree("./runs")
    os.makedirs("./weights")
    os.makedirs("./runs")

In [10]:
total_time = 0
EPOCHS = 25
best_vacc = 0.0
epoch_number = 0

reset_folder()
suffix = f'e{EPOCHS}_b{BATCH}_t{len(train_set)}_v{len(valid_set)}'
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter(f'runs/resnet18_trainer_{suffix}')
model_path = f'weights/Resnet18_{suffix}.pth'


for epoch in range(EPOCHS):
    print(f'EPOCH {epoch_number+1}: ', end="")

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss, avg_acc, train_time = train_one_epoch(epoch_number, writer)
    total_time += train_time

    # Set the model to evaluation model
    model.eval()
    total_vloss = 0.0
    total_vcorrect = 0
    total_vsamples = 0

    with torch.no_grad():
        for i, vdata in enumerate(valid_loader):
            vinputs, vlabels = vdata[0].to(device), vdata[1].to(device)
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels) # current batch valid loss
            total_vloss += vloss.item() # running_vloss
            total_vcorrect += (voutputs.argmax(dim=1) == vlabels).sum().item()
            total_vsamples += vlabels.size(0)

    avg_vloss = total_vloss / (i + 1)
    avg_vacc = total_vcorrect / total_vsamples
    print(f'Train Loss: {avg_loss:.4f} / Valid Loss: {avg_vloss:.4f} / '
          f'Train Accuracy: {avg_acc:.4f} / Valid Accuracy: {avg_vacc:.4f} --- ({train_time:.4f} sec)')

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.add_scalars('Training vs. Validation Accuracy', 
                    { 'Training' : avg_acc, 'Validation': avg_vacc},
                    epoch_number + 1)
    writer.flush() # immediately write into file
 
    # Track best performance, and save the model's state
    if best_vacc < avg_vacc:
        best_model_state = model.state_dict()
        best_vacc = avg_vacc
        best_epoch = epoch_number + 1

    epoch_number += 1

print(f'== Total time: {total_time:.4f} sec ==')
print(f'== save epoch: {best_epoch}, best  vacc: {best_vacc:.4f} ==')
torch.save(best_model_state, model_path)
writer.close()

EPOCH 1: Train Loss: 8.8563 / Valid Loss: 7.6357 / Train Accuracy: 0.0429 / Valid Accuracy: 0.0333 --- (6.9170 sec)
EPOCH 2: Train Loss: 5.5502 / Valid Loss: 7.0242 / Train Accuracy: 0.2429 / Valid Accuracy: 0.1000 --- (0.2823 sec)
EPOCH 3: Train Loss: 5.4964 / Valid Loss: 6.6534 / Train Accuracy: 0.2286 / Valid Accuracy: 0.2000 --- (0.2974 sec)
EPOCH 4: Train Loss: 3.9217 / Valid Loss: 5.9304 / Train Accuracy: 0.2286 / Valid Accuracy: 0.1667 --- (0.2879 sec)
EPOCH 5: Train Loss: 3.5949 / Valid Loss: 8.2986 / Train Accuracy: 0.2000 / Valid Accuracy: 0.1667 --- (0.2995 sec)
EPOCH 6: Train Loss: 3.1653 / Valid Loss: 4.4575 / Train Accuracy: 0.2429 / Valid Accuracy: 0.2333 --- (0.2840 sec)
EPOCH 7: Train Loss: 2.8031 / Valid Loss: 3.4116 / Train Accuracy: 0.2857 / Valid Accuracy: 0.2000 --- (0.2835 sec)
EPOCH 8: Train Loss: 2.2989 / Valid Loss: 3.2706 / Train Accuracy: 0.3143 / Valid Accuracy: 0.2667 --- (0.2867 sec)
EPOCH 9: Train Loss: 2.6108 / Valid Loss: 3.0528 / Train Accuracy: 0.285

In [11]:
print(model.conv1.weight)

Parameter containing:
tensor([[[[-1.0069e-02, -1.3557e-02, -9.1355e-03,  ...,  5.3608e-02,
            1.6570e-02, -1.4646e-02],
          [ 1.0844e-02,  3.3984e-03, -1.1661e-01,  ..., -2.7360e-01,
           -1.3079e-01,  2.8700e-03],
          [-1.1874e-02,  5.4071e-02,  2.9604e-01,  ...,  5.1904e-01,
            2.5732e-01,  6.2824e-02],
          ...,
          [-3.4586e-02,  6.7463e-03,  7.2761e-02,  ..., -3.3935e-01,
           -4.2843e-01, -2.6827e-01],
          [ 2.6184e-02,  3.4247e-02,  5.9439e-02,  ...,  4.0480e-01,
            3.8420e-01,  1.5571e-01],
          [-1.6383e-02, -7.2828e-03, -2.6395e-02,  ..., -1.5636e-01,
           -8.6250e-02, -1.2880e-02]],

         [[-1.5416e-02, -4.1712e-02, -5.0123e-02,  ...,  2.0020e-02,
           -1.0096e-02, -3.7144e-02],
          [ 3.9152e-02,  1.9342e-02, -1.1925e-01,  ..., -3.2379e-01,
           -1.7146e-01, -1.1399e-02],
          [-1.1461e-02,  8.5079e-02,  3.9374e-01,  ...,  6.9820e-01,
            3.6105e-01,  1.1450e-01]

## Load a saved version of the model

In [13]:
# PATH = r"models\model_renet18_.pth"
# saved_model = models.resnet18()
# saved_model.load_state_dict(torch.load(PATH))