colab에서만 실행

In [3]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/vision/5_day/YOLO

Mounted at /content/drive
/content/drive/MyDrive/vision/5_day/YOLO


In [4]:
%cd data

/content/drive/MyDrive/vision/5_day/YOLO/data


In [5]:
%pwd

'/content/drive/MyDrive/vision/5_day/YOLO/data'

In [6]:
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 

# Extract tar files
!tar xf VOCtrainval_06-Nov-2007.tar
!tar xf VOCtest_06-Nov-2007.tar

# Need voc_label.py to clean up data from xml files
# !wget https://pjreddie.com/media/files/voc_label.py

--2023-05-25 18:47:08--  http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 460032000 (439M) [application/x-tar]
Saving to: ‘VOCtrainval_06-Nov-2007.tar’


2023-05-25 18:47:56 (9.44 MB/s) - ‘VOCtrainval_06-Nov-2007.tar’ saved [460032000/460032000]

--2023-05-25 18:47:56--  http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 451020800 (430M) [application/x-tar]
Saving to: ‘VOCtest_06-Nov-2007.tar’


2023-05-25 18:48:42 (9.43 MB/s) - ‘VOCtest_06-Nov-2007.tar’ saved [451020800/451020800]

--2023-05-25 18:50:58--  https://p

In [8]:
# Run python file to clean data from xml files
!python voc_label.py

In [9]:
# Get train by using train+val from 2007 and 2012
# Then we only test on 2007 test set
# Unclear from paper what they actually just as a dev set
!cat 2007_train.txt 2007_val.txt > train.txt
!cp 2007_test.txt test.txt

# Move txt files we won't be using to clean up a little bit
!mkdir old_txt_files
!mv 2007* old_txt_files/

In [10]:
!python generate_csv.py

In [11]:
!mkdir images
!mkdir labels

!cp VOCdevkit/VOC2007/labels/*.txt labels
                                                                    
!mv VOCdevkit/VOC2007/JPEGImages/*.jpg images                              
!mv VOCdevkit/VOC2007/labels/*.txt labels                                          

!rm -rf VOCdevkit/
!mv test.txt old_txt_files/
!mv train.txt old_txt_files/

cp: cannot stat 'VOCdevkit/*.jpg': No such file or directory


In [12]:
%cd ..

/content/drive/MyDrive/vision/5_day/YOLO


In [13]:
"""
Main file for training Yolo model on Pascal VOC dataset

"""

import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import Yolov1
from dataset import VOCDataset
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)
from loss import YoloLoss

seed = 123
torch.manual_seed(seed)

# Hyperparameters etc. 
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0
EPOCHS = 1000
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"
IMG_DIR = "data/images"
LABEL_DIR = "data/labels"


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])


def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave=True)
    mean_loss = []

    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update progress bar
        loop.set_postfix(loss=loss.item())

    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")


def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)

    train_dataset = VOCDataset(
        "data/train.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR,
    )

    test_dataset = VOCDataset(
        "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
    )

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    for epoch in range(EPOCHS):
        # for x, y in train_loader:
        #    x = x.to(DEVICE)
        #    for idx in range(8):
        #        bboxes = cellboxes_to_boxes(model(x))
        #        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
        #        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)

        #    import sys
        #    sys.exit()

        pred_boxes, target_boxes = get_bboxes(
            train_loader, model, iou_threshold=0.5, threshold=0.4
        )

        mean_avg_prec = mean_average_precision(
            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
        )
        print(f"Train mAP: {mean_avg_prec}")

        #if mean_avg_prec > 0.9:
        #    checkpoint = {
        #        "state_dict": model.state_dict(),
        #        "optimizer": optimizer.state_dict(),
        #    }
        #    save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
        #    import time
        #    time.sleep(10)

        train_fn(train_loader, model, optimizer, loss_fn)

In [None]:
if __name__ == "__main__":
    main()

Train mAP: 0.0


100%|██████████| 313/313 [00:33<00:00,  9.47it/s, loss=224]

Mean loss was 312.5338512091591





Train mAP: 0.0011580729624256492


100%|██████████| 313/313 [00:32<00:00,  9.66it/s, loss=135]

Mean loss was 179.90254084675456





Train mAP: 0.002277806866914034


100%|██████████| 313/313 [00:32<00:00,  9.64it/s, loss=195]

Mean loss was 167.5273847244799





Train mAP: 0.00541496928781271


100%|██████████| 313/313 [00:32<00:00,  9.62it/s, loss=155]

Mean loss was 157.38924595646964





Train mAP: 0.010457867756485939


100%|██████████| 313/313 [00:32<00:00,  9.63it/s, loss=121]

Mean loss was 147.75714608579398





Train mAP: 0.018671153113245964


100%|██████████| 313/313 [00:32<00:00,  9.56it/s, loss=143]

Mean loss was 138.45119525068483





Train mAP: 0.02543400600552559


100%|██████████| 313/313 [00:32<00:00,  9.68it/s, loss=156]

Mean loss was 128.83644540317522





Train mAP: 0.04002484306693077


100%|██████████| 313/313 [00:32<00:00,  9.62it/s, loss=81.5]

Mean loss was 120.5226822630666





Train mAP: 0.07325279712677002


100%|██████████| 313/313 [00:32<00:00,  9.61it/s, loss=117]

Mean loss was 111.14865505961946





Train mAP: 0.08719789981842041


100%|██████████| 313/313 [00:33<00:00,  9.40it/s, loss=76.9]

Mean loss was 106.04699458405614





Train mAP: 0.10491375625133514


100%|██████████| 313/313 [00:32<00:00,  9.57it/s, loss=88]

Mean loss was 101.47475134755095





Train mAP: 0.18784397840499878


100%|██████████| 313/313 [00:32<00:00,  9.68it/s, loss=52]

Mean loss was 91.24358867608701





Train mAP: 0.2588445544242859


100%|██████████| 313/313 [00:32<00:00,  9.56it/s, loss=72.9]

Mean loss was 83.77141934233352





Train mAP: 0.2810685336589813


100%|██████████| 313/313 [00:32<00:00,  9.58it/s, loss=77.3]

Mean loss was 80.4448561378942





Train mAP: 0.335563600063324


100%|██████████| 313/313 [00:32<00:00,  9.64it/s, loss=60.2]

Mean loss was 75.12533610773544





Train mAP: 0.46266621351242065


100%|██████████| 313/313 [00:32<00:00,  9.60it/s, loss=83.4]

Mean loss was 71.450225001326





Train mAP: 0.5202569961547852


100%|██████████| 313/313 [00:32<00:00,  9.55it/s, loss=52.1]

Mean loss was 67.10668811249657





Train mAP: 0.5812641978263855


100%|██████████| 313/313 [00:33<00:00,  9.47it/s, loss=68.3]

Mean loss was 63.6854369922187





Train mAP: 0.6118864417076111


100%|██████████| 313/313 [00:32<00:00,  9.61it/s, loss=98.2]

Mean loss was 61.05508339366974





Train mAP: 0.5654925107955933


100%|██████████| 313/313 [00:32<00:00,  9.59it/s, loss=47.8]

Mean loss was 58.984733947168905





Train mAP: 0.6515854597091675


100%|██████████| 313/313 [00:32<00:00,  9.55it/s, loss=50.8]

Mean loss was 56.781015006117165





Train mAP: 0.6644946932792664


100%|██████████| 313/313 [00:32<00:00,  9.61it/s, loss=42.3]

Mean loss was 53.644149091677924





Train mAP: 0.6948345303535461


100%|██████████| 313/313 [00:32<00:00,  9.62it/s, loss=57.2]

Mean loss was 68.15825506216422





Train mAP: 0.6425919532775879


100%|██████████| 313/313 [00:32<00:00,  9.54it/s, loss=310]

Mean loss was 52.571430651143714





Train mAP: 0.7274281978607178


100%|██████████| 313/313 [00:32<00:00,  9.64it/s, loss=63.1]

Mean loss was 46.90596877271756





Train mAP: 0.7512044310569763


100%|██████████| 313/313 [00:32<00:00,  9.52it/s, loss=42.4]

Mean loss was 43.16860422615807





Train mAP: 0.7638799548149109


100%|██████████| 313/313 [00:32<00:00,  9.66it/s, loss=55.5]

Mean loss was 41.87674057445587





Train mAP: 0.7405403852462769


100%|██████████| 313/313 [00:32<00:00,  9.62it/s, loss=46.2]

Mean loss was 45.7601436334677





Train mAP: 0.7209747433662415


100%|██████████| 313/313 [00:32<00:00,  9.74it/s, loss=22.8]

Mean loss was 43.13201386326799





Train mAP: 0.7338429093360901


100%|██████████| 313/313 [00:32<00:00,  9.59it/s, loss=29.3]

Mean loss was 40.09106367921677





Train mAP: 0.7621299028396606


100%|██████████| 313/313 [00:32<00:00,  9.65it/s, loss=28.9]

Mean loss was 38.322157332691525





Train mAP: 0.773625910282135


100%|██████████| 313/313 [00:32<00:00,  9.58it/s, loss=41.5]

Mean loss was 37.437538866036995





Train mAP: 0.774791955947876


100%|██████████| 313/313 [00:34<00:00,  9.13it/s, loss=42.6]

Mean loss was 36.07607463678232





Train mAP: 0.7734755277633667


100%|██████████| 313/313 [00:34<00:00,  9.08it/s, loss=35.7]

Mean loss was 35.69287231250312





Train mAP: 0.7632765173912048


100%|██████████| 313/313 [00:34<00:00,  9.06it/s, loss=24.3]

Mean loss was 35.44606428786208





Train mAP: 0.7482253909111023


100%|██████████| 313/313 [00:34<00:00,  9.06it/s, loss=37]

Mean loss was 36.17398502727667





Train mAP: 0.758388876914978


100%|██████████| 313/313 [00:34<00:00,  9.14it/s, loss=73.7]

Mean loss was 35.637830783003054





Train mAP: 0.7751839756965637


100%|██████████| 313/313 [00:33<00:00,  9.32it/s, loss=22.1]

Mean loss was 33.13717707429831



