In [33]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Subset
from torchvision import models
    
from sklearn.metrics import accuracy_score

from datetime import datetime
from time import time
import os
import shutil

## Check CUDA

In [34]:
if torch.cuda.is_available():
    cudnn.benchmark = True
    device = "cuda"
    print(torch.cuda.get_device_name())
else:
    device = "cpu"
    print("Use CPU")


Quadro RTX 3000 with Max-Q Design


## Load data

In [35]:
# train 只有 ToTensor()、Normalize()的話，會發生 overfit，Valid 無法收斂
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
transform_valid = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

# Create dataset(use 100 data for my laptop)
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
valid_set = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_valid, download=True)
train_set = Subset(train_set, list(range(0, 70)))
valid_set = Subset(valid_set, list(range(70, 100)))

# Create data loaders for our datasets
BATCH = 5
train_loader = DataLoader(train_set, batch_size=BATCH, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=BATCH, shuffle=False)

print(f'## Training set has {len(train_set)} instances.')
print(f'## Validation set has {len(valid_set)} instances.')

Files already downloaded and verified
Files already downloaded and verified
## Training set has 70 instances.
## Validation set has 30 instances.


## Build model

In [36]:
model = models.resnet18(weights="IMAGENET1K_V1").to(device) # 使用 pretrain model 比較好收斂
num_fcin = model.fc.in_features
model.fc = nn.Linear(num_fcin, len(train_set.dataset.classes)).to(device) # model.fc.in_features : 全連接層的輸入特徵數量


## Loss function(Criterion) & Optimizer

In [37]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

## Training

In [38]:
def train_one_epoch(epoch_index, tb_writer):
    total_vloss, total_vcorrect, total_vsamples = 0.0, 0.0, 0.0
    running_loss, running_acc = 0.0, 0.0
    last_loss, last_acc = 0.0, 0.0

    START_TIME = time()
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero your gradients for every batch
        optimizer.zero_grad()
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        running_loss += loss.item()
        running_acc += (outputs.argmax(dim=1) == labels).float().mean().item()

        total_vloss += loss.item()
        total_vcorrect += (outputs.argmax(dim=1) == labels).sum().item()
        total_vsamples += labels.size(0)

        if i % 10 == 9:
            last_loss = running_loss / 10
            last_acc = running_acc / 10
            # print(f' - Batch {i+1} loss: {last_loss:.4f} / accuracy: {last_acc:.4f}')
            running_loss, running_acc = 0.0, 0.0

    END_TIME = time()

    return total_vloss / (i + 1), total_vcorrect / total_vsamples, (END_TIME - START_TIME)

def reset_folder():
    shutil.rmtree("./weights")
    shutil.rmtree("./runs")
    os.makedirs("./weights")
    os.makedirs("./runs")

In [39]:
total_time = 0
EPOCHS = 20
best_vacc = 0.0
epoch_number = 0

reset_folder()
suffix = f'e{EPOCHS}_b{BATCH}_t{len(train_set)}_v{len(valid_set)}'
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter(f'runs/resnet18_trainer_{suffix}')
model_path = f'weights/Resnet18_{suffix}.pth'


for epoch in range(EPOCHS):
    print(f'EPOCH {epoch_number+1}: ', end="")

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss, avg_acc, train_time = train_one_epoch(epoch_number, writer)
    total_time += train_time

    # Set the model to evaluation model
    model.eval()
    total_vloss = 0.0
    total_vcorrect = 0
    total_vsamples = 0

    with torch.no_grad():
        for i, vdata in enumerate(valid_loader):
            vinputs, vlabels = vdata[0].to(device), vdata[1].to(device)
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels) # current batch valid loss
            total_vloss += vloss.item() # running_vloss
            total_vcorrect += (voutputs.argmax(dim=1) == vlabels).sum().item()
            total_vsamples += vlabels.size(0)

    avg_vloss = total_vloss / (i + 1)
    avg_vacc = total_vcorrect / total_vsamples
    print(f'Train Loss: {avg_loss:.4f} / Valid Loss: {avg_vloss:.4f} / '
          f'Train Accuracy: {avg_acc:.4f} / Valid Accuracy: {avg_vacc:.4f} --- ({train_time:.4f} sec)')

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.add_scalars('Training vs. Validation Accuracy', 
                    { 'Training' : avg_acc, 'Validation': avg_vacc},
                    epoch_number + 1)
    writer.flush() # immediately write into file
 
    # Track best performance, and save the model's state
    if best_vacc < avg_vacc:
        best_model = model
        best_vacc = avg_vacc
        best_epoch = epoch_number + 1

    epoch_number += 1

print(f'== Total time: {total_time:.4f} sec ==')
print(f'== save epoch: {best_epoch}, best  vacc: {best_vacc:.4f} ==')
torch.save(best_model, model_path)
writer.close()

EPOCH 1: Train Loss: 3.0277 / Valid Loss: 3.1211 / Train Accuracy: 0.1857 / Valid Accuracy: 0.0333 --- (0.6296 sec)
EPOCH 2: Train Loss: 2.8454 / Valid Loss: 3.4237 / Train Accuracy: 0.2143 / Valid Accuracy: 0.1333 --- (0.2010 sec)
EPOCH 3: Train Loss: 2.3935 / Valid Loss: 2.9840 / Train Accuracy: 0.2286 / Valid Accuracy: 0.2667 --- (0.2484 sec)
EPOCH 4: Train Loss: 2.4505 / Valid Loss: 3.0895 / Train Accuracy: 0.1857 / Valid Accuracy: 0.2000 --- (0.2213 sec)
EPOCH 5: Train Loss: 2.2243 / Valid Loss: 3.0876 / Train Accuracy: 0.2714 / Valid Accuracy: 0.2667 --- (0.2241 sec)
EPOCH 6: Train Loss: 2.7530 / Valid Loss: 3.2679 / Train Accuracy: 0.0857 / Valid Accuracy: 0.2000 --- (0.1957 sec)
EPOCH 7: Train Loss: 2.1652 / Valid Loss: 2.7607 / Train Accuracy: 0.2286 / Valid Accuracy: 0.1667 --- (0.1969 sec)
EPOCH 8: Train Loss: 2.1338 / Valid Loss: 3.1201 / Train Accuracy: 0.2714 / Valid Accuracy: 0.3333 --- (0.2286 sec)
EPOCH 9: Train Loss: 1.9204 / Valid Loss: 2.9641 / Train Accuracy: 0.300

In [40]:
print(model.conv1.weight)

Parameter containing:
tensor([[[[-1.0019e-02,  9.4583e-04,  7.1766e-03,  ...,  6.6072e-02,
            3.0613e-02, -3.4041e-03],
          [ 2.1255e-02,  2.1835e-02, -8.9811e-02,  ..., -2.4965e-01,
           -1.1392e-01,  2.0792e-02],
          [ 2.1951e-03,  6.1775e-02,  3.0187e-01,  ...,  5.3524e-01,
            2.6762e-01,  7.6644e-02],
          ...,
          [-2.4845e-02,  6.1923e-03,  6.8051e-02,  ..., -3.3604e-01,
           -4.1920e-01, -2.5602e-01],
          [ 2.1462e-02,  2.6295e-02,  5.2298e-02,  ...,  4.1164e-01,
            3.9093e-01,  1.6546e-01],
          [-2.7134e-02, -2.0185e-02, -3.4457e-02,  ..., -1.5499e-01,
           -8.7559e-02, -1.3467e-02]],

         [[-1.7676e-02, -2.3922e-02, -2.9301e-02,  ...,  3.6535e-02,
            9.4494e-03, -2.4356e-02],
          [ 4.9798e-02,  4.1248e-02, -8.5938e-02,  ..., -2.9284e-01,
           -1.5073e-01,  9.8671e-03],
          [ 2.2459e-03,  9.4682e-02,  4.0387e-01,  ...,  7.2347e-01,
            3.7933e-01,  1.3507e-01]

## Load a saved version of the model

In [41]:
# PATH = r"models\model_renet18_.pth"
# saved_model = models.resnet18()
# saved_model.load_state_dict(torch.load(PATH))