In [22]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Subset
from torchvision import models

from datetime import datetime

## Check CUDA

In [14]:
if torch.__version__:
    print("PyTorch 安裝成功，版本:", torch.__version__)
    
    if torch.cuda.is_available():
        print("CUDA 可用")
        print(torch.cuda.get_device_name())
    else:
        print("CUDA 不可用")

PyTorch 安裝成功，版本: 1.12.1
CUDA 可用
Quadro RTX 3000 with Max-Q Design


## Load data

In [28]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, ), (0.5, ))])

# Create dataset(use 100 data for my laptop)
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
valid_set = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)
train_set = Subset(train_set, list(range(90)))
valid_set = Subset(valid_set, list(range(90, 100)))

# Create data loaders for our datasets
train_loader = DataLoader(train_set, batch_size=5, shuffle=False)
valid_loader = DataLoader(valid_set, batch_size=5, shuffle=False)

print(f'## Training set has {len(train_set)} instances.')
print(f'## Validation set has {len(valid_set)} instances.')

Files already downloaded and verified
Files already downloaded and verified
## Training set has 90 instances.
## Validation set has 10 instances.


## Build model

In [16]:
model = models.resnet18(weights="IMAGENET1K_V1", progress=True)

## Loss function(Criterion) & Optimizer

In [17]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

## Training

In [26]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.0
    last_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:
            last_loss = running_loss/10
            print(f'   Batch {i+1} loss: {last_loss}')
            tb_x = epoch_index * len(train_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.0
    return last_loss

In [38]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/resnet18_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5
best_vloss = 1_000_000.0

for epoch in range(EPOCHS):
    print(f'== EPOCH {epoch_number+1} ==')

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer)

    # Set the model to evaluation mode
    model.eval()
    running_vloss = 0.0

    with torch.no_grad():
        for i, vdata in enumerate(valid_loader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    print(f'== Train Loss: {avg_loss} / Valid Loss: {avg_vloss}')

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.flush()

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'models/model_renet18_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

== EPOCH 1 ==
   Batch 10 loss: 0.7376733273267746
== Train Loss: 0.7376733273267746 / Valid Loss: 3.8031139373779297
== EPOCH 2 ==
   Batch 10 loss: 0.6659890800714493
== Train Loss: 0.6659890800714493 / Valid Loss: 4.150353908538818
== EPOCH 3 ==
   Batch 10 loss: 0.46372426897287367
== Train Loss: 0.46372426897287367 / Valid Loss: 4.364117622375488
== EPOCH 4 ==
   Batch 10 loss: 0.46563472002744677
== Train Loss: 0.46563472002744677 / Valid Loss: 4.529588222503662
== EPOCH 5 ==
   Batch 10 loss: 0.9611490845680237
== Train Loss: 0.9611490845680237 / Valid Loss: 3.0714406967163086


In [None]:
# 打不開阿!!!
!tensorboard --logdir runs/resnet18_trainer_20241222_125827

^C


## Load a saved version of the model

In [31]:
PATH = r"C:\Users\user\Desktop\AI加速\code\weights\model_renet18_20241222_125827_4"
saved_model = models.resnet18()
saved_model.load_state_dict(torch.load(PATH))

<All keys matched successfully>