In [5]:
import torch
from torchvision import models
from torch.optim import Adam
from torch.cuda.amp import GradScaler, autocast  # 변경 부분

model = models.resnet18().cuda()
optimizer = Adam(model.parameters(), lr=3e-4)

In [None]:
scaler = GradScaler()

for epoch in range(5):
    for input, target in data:
        optimizer.zero_grad()

        # Runs the forward pass with autocasting.
        with autocast():
            output = model(input)
            loss = loss_fn(output, target)

        # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
        # Backward passes under autocast are not recommended.
        # Backward ops run in the same dtype autocast chose for corresponding forward ops.
        scaler.scale(loss).backward()

        # scaler.step() first unscales the gradients of the optimizer's assigned params.
        # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
        # otherwise, optimizer.step() is skipped.
        scaler.step(optimizer)

        # Updates the scale for next iteration.
        scaler.update()

In [None]:
scaler = GradScaler()

for epoch in epochs:
    for input, target in data:
        optimizer.zero_grad()
        with autocast():
            output = model(input)
            loss = loss_fn(output, target)
        scaler.scale(loss).backward()

        # Unscales the gradients of optimizer's assigned params in-place
        scaler.unscale_(optimizer)

        # Since the gradients of optimizer's assigned params are unscaled, clips as usual:
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)

        # optimizer's gradients are already unscaled, so scaler.step does not unscale them,
        # although it still skips optimizer.step() if the gradients contain infs or NaNs.
        scaler.step(optimizer)

        # Updates the scale for next iteration.
        scaler.update()

In [None]:
# https://pytorch.org/docs/stable/amp.html
# https://pytorch.org/docs/stable/notes/amp_examples.html
# https://discuss.pytorch.org/t/optimizer-step-before-lr-scheduler-step-error-using-gradscaler/92930

steps = len(train_dl) * epochs
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=len(train_dl), epochs=epochs)
avg_train_losses = []
avg_val_losses = []
avg_val_scores = []
lr = []
best_avg_val_score = -1000
scaler = torch.cuda.amp.GradScaler() # mixed precision support

for epoch in tqdm(range(epochs), total=epochs):
    model.train()
    total_train_loss = 0.0
    for i, (x, y, image_tensor) in enumerate(train_dl):
        x, y, image_tensor = move_to_dev(x, y, image_tensor)
        model.zero_grad()
        with torch.cuda.amp.autocast():
            output = model(x, image_tensor)
            loss = criterion(y, output)
        total_train_loss += loss.item()

        # Backward Pass and Optimization
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        lr.append(get_lr(optimizer))