colab에서만 실행

In [2]:
%cd /content/YOLOv1/data

/content/YOLOv1/data


In [3]:
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar

# Extract tar files
!tar xf VOCtrainval_06-Nov-2007.tar
!tar xf VOCtest_06-Nov-2007.tar

--2023-09-15 00:06:22--  http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 460032000 (439M) [application/x-tar]
Saving to: ‘VOCtrainval_06-Nov-2007.tar’


2023-09-15 00:06:50 (15.9 MB/s) - ‘VOCtrainval_06-Nov-2007.tar’ saved [460032000/460032000]

--2023-09-15 00:06:50--  http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 451020800 (430M) [application/x-tar]
Saving to: ‘VOCtest_06-Nov-2007.tar’


2023-09-15 00:07:17 (15.9 MB/s) - ‘VOCtest_06-Nov-2007.tar’ saved [451020800/451020800]



In [4]:
# Need voc_label.py to clean up data from xml files
!wget https://pjreddie.com/media/files/voc_label.py

--2023-09-15 00:07:27--  https://pjreddie.com/media/files/voc_label.py
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2042 (2.0K) [application/octet-stream]
Saving to: ‘voc_label.py’


2023-09-15 00:07:27 (1.25 GB/s) - ‘voc_label.py’ saved [2042/2042]



voc_lable.py 파일 4번째 줄에 있는 2012관련 내용 삭제

In [6]:
# Run python file to clean data from xml files
!python voc_label.py

In [7]:
# Get train by using train+val from 2007 and 2012
# Then we only test on 2007 test set
# Unclear from paper what they actually just as a dev set
!cat 2007_train.txt 2007_val.txt > train.txt
!cp 2007_test.txt test.txt

# Move txt files we won't be using to clean up a little bit
!mkdir old_txt_files
!mv 2007* old_txt_files/

In [8]:
!python generate_csv.py

In [9]:
!mkdir images
!mkdir labels

!cp VOCdevkit/VOC2007/labels/*.txt labels

!mv VOCdevkit/VOC2007/JPEGImages/*.jpg images
!mv VOCdevkit/VOC2007/labels/*.txt labels

!rm -rf VOCdevkit/
!mv test.txt old_txt_files/
!mv train.txt old_txt_files/

In [10]:
%cd ..

/content/YOLOv1


In [11]:
"""
Main file for training Yolo model on Pascal VOC dataset

"""

import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)
from loss import YoloLoss

seed = 123
torch.manual_seed(seed)

# Hyperparameters etc.
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 1000
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"
IMG_DIR = "data/images"
LABEL_DIR = "data/labels"


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])


def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave=True)
    mean_loss = []

    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update progress bar
        loop.set_postfix(loss=loss.item())

    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")


def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)

    train_dataset = VOCDataset(
        "data/train.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR,
    )

    test_dataset = VOCDataset(
        "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
    )

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    for epoch in range(EPOCHS):
        # for x, y in train_loader:
        #    x = x.to(DEVICE)
        #    for idx in range(8):
        #        bboxes = cellboxes_to_boxes(model(x))
        #        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
        #        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)

        #    import sys
        #    sys.exit()

        pred_boxes, target_boxes = get_bboxes(
            train_loader, model, iou_threshold=0.5, threshold=0.4
        )

        mean_avg_prec = mean_average_precision(
            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
        )
        print(f"Train mAP: {mean_avg_prec}")

        #if mean_avg_prec > 0.9:
        #    checkpoint = {
        #        "state_dict": model.state_dict(),
        #        "optimizer": optimizer.state_dict(),
        #    }
        #    save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
        #    import time
        #    time.sleep(10)

        train_fn(train_loader, model, optimizer, loss_fn)

In [None]:
if __name__ == "__main__":
    main()

Train mAP: 0.0


100%|██████████| 313/313 [00:21<00:00, 14.59it/s, loss=214]

Mean loss was 305.408904298045





Train mAP: 0.0012124788481742144


100%|██████████| 313/313 [00:22<00:00, 14.05it/s, loss=134]

Mean loss was 174.41064248374477





Train mAP: 0.0039656152948737144


100%|██████████| 313/313 [00:25<00:00, 12.30it/s, loss=189]

Mean loss was 163.82609441105168





Train mAP: 0.008449513465166092


100%|██████████| 313/313 [00:23<00:00, 13.13it/s, loss=149]

Mean loss was 154.7486830885037





Train mAP: 0.008871860802173615


100%|██████████| 313/313 [00:23<00:00, 13.21it/s, loss=127]

Mean loss was 145.17241142199825





Train mAP: 0.02357216365635395


100%|██████████| 313/313 [00:20<00:00, 15.06it/s, loss=130]

Mean loss was 134.02123209310415





Train mAP: 0.024297038093209267


100%|██████████| 313/313 [00:25<00:00, 12.50it/s, loss=148]

Mean loss was 121.09130669249514





Train mAP: 0.04028784856200218


100%|██████████| 313/313 [00:26<00:00, 11.97it/s, loss=81.9]


Mean loss was 112.0763483992019
Train mAP: 0.08394774794578552


100%|██████████| 313/313 [00:25<00:00, 12.28it/s, loss=141]

Mean loss was 106.62890841938055





Train mAP: 0.03251168504357338


 73%|███████▎  | 229/313 [00:18<00:06, 12.64it/s, loss=106] 