In [19]:
import torch
from torch.utils.data import DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as transforms
from tqdm import tqdm
from dataset_faster_rcnn import get_data_loaders
from torch.optim import Adam
import numpy as np

In [20]:
# CUDNN 설정
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

In [21]:

train_loader, val_loader, test_loader = get_data_loaders()

def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [22]:
def calculate_iou(boxA, boxB):

    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])


    interArea = max(0, xB - xA) * max(0, yB - yA)


    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])


    iou = interArea / float(boxAArea + boxBArea - interArea)

    return iou
def evaluate_precision_recall(outputs, targets, iou_threshold=0.5):
    TP = 0
    FP = 0
    FN = 0

    for output, target in zip(outputs, targets):
        pred_boxes = output['boxes']
        true_boxes = target['boxes']

        matched = [False] * len(true_boxes)

        for pred_box in pred_boxes:
            found_match = False

            for i, true_box in enumerate(true_boxes):
                iou = calculate_iou(pred_box.tolist(), true_box.tolist())
                if iou >= iou_threshold and not matched[i]:
                    TP += 1
                    matched[i] = True
                    found_match = True
                    break

            if not found_match:
                FP += 1

        FN += len([m for m in matched if not m])

    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0

    return precision, recall


In [23]:
def train(model, train_loader, val_loader, optimizer, num_epochs, device):
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            images = batch['image'].to(device)
            targets = [{k: v.to(device) for k, v in t.items()} for t in batch['targets']]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimizer.step()

            train_loss += losses.item()
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0
        all_precisions = []
        all_recalls = []
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validating", leave=False):
                images = batch['image'].to(device)
                targets = [{k: v.to(device) for k, v in t.items()} for t in batch['targets']]
                
                outputs = model(images)
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                val_loss += losses.item()

                precision, recall = evaluate_precision_recall(outputs, targets)
                all_precisions.append(precision)
                all_recalls.append(recall)

        val_loss /= len(val_loader)
        mean_precision = np.mean(all_precisions)
        mean_recall = np.mean(all_recalls)

        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Precision: {mean_precision:.4f}, Recall: {mean_recall:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), f'./weight/loss{val_loss}_precision{mean_precision}_Recall{mean_recall}.pth')
            print("Model improved and saved.")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("No improvement for {patience} consecutive epochs, stopping early.")
                break

In [24]:

device = torch.device("cuda:1")

model = get_model(num_classes=6).to(device)

optimizer = Adam(model.parameters(), lr=0.005, weight_decay=0.0005)

train(model, train_loader, val_loader, optimizer, 30, device)

Epoch 1/30 - Training:   0%|          | 4/1053 [00:33<2:28:15,  8.48s/it]


RuntimeError: stack expects each tensor to be equal size, but got [3, 512, 682] at entry 0 and [3, 512, 910] at entry 1