In [2]:
import numpy as np

# 3D array
arr_3d = np.array([[[1, 2, 3], [4, 5, 6]],
                   [[7, 8, 9], [10, 11, 12]],
                   [[13, 14, 15], [16, 17, 18]]])

# Selecting all elements along the first axis, and only the first element along the last axis
selected_3d = arr_3d[..., 0:1]

print(selected_3d)

[[[ 1]
  [ 4]]

 [[ 7]
  [10]]

 [[13]
  [16]]]


In [3]:
print(arr_3d[0:1])

[[[1 2 3]
  [4 5 6]]]


In [4]:
import torch

iou_b1 = torch.tensor([[21,10],[1,2]])
iou_b2 = torch.tensor([[18,9],[5,6]])
print(iou_b1)

ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=2)
print(ious)

# Take the box with highest IoU out of the two prediction
# Note that bestbox will be indices of 0, 1 for which bbox was best
iou_maxes, bestbox = torch.max(ious, dim=0)
print(iou_maxes)
print(bestbox)

tensor([[21, 10],
        [ 1,  2]])
tensor([[[21, 10, 18,  9],
         [ 1,  2,  5,  6]]])
tensor([[21, 10, 18,  9],
        [ 1,  2,  5,  6]])
tensor([[0, 0, 0, 0],
        [0, 0, 0, 0]])


In [5]:
import pandas as pd
import os
from PIL import Image

annotations = pd.read_csv("/Users/yohanabeysinghe/Mac/0. ML/Datasets/PascalVOC YOLO/test.csv", header=None)
img_dir = "/Users/yohanabeysinghe/Mac/0. ML/Datasets/PascalVOC YOLO/images"
label_dir = "/Users/yohanabeysinghe/Mac/0. ML/Datasets/PascalVOC YOLO/labels"
S= 7
B= 2
C= 20 

In [6]:
index = 0
label_path = os.path.join(label_dir, annotations.iloc[index, 1])
label_path


'/Users/yohanabeysinghe/Mac/0. ML/Datasets/PascalVOC YOLO/labels/000001.txt'

In [7]:
boxes = []

with open(label_path) as f:
    for label in f.readlines():
        class_label, x, y, width, height = [
            float(x) if float(x) != int(float(x)) else int(x)
            for x in label.replace("\n", "").split()
        ]

        boxes.append([class_label, x, y, width, height])

In [8]:
boxes

[[11, 0.34419263456090654, 0.611, 0.4164305949008499, 0.262],
 [14, 0.509915014164306, 0.51, 0.9745042492917847, 0.972]]

In [9]:
img_path = os.path.join(img_dir, annotations.iloc[index, 0])
image = Image.open(img_path)

In [10]:
import torch
boxes = torch.tensor(boxes)
boxes

tensor([[11.0000,  0.3442,  0.6110,  0.4164,  0.2620],
        [14.0000,  0.5099,  0.5100,  0.9745,  0.9720]])

In [11]:
label_matrix = torch.zeros((S, S, C + 5 * B))
for box in boxes:
    class_label, x, y, width, height = box.tolist()
    print(x,y)
    class_label = int(class_label)
    print(class_label)
    i, j = int(S * y), int(S * x)
    print(i,j)

0.34419262409210205 0.6110000014305115
11
4 2
0.5099149942398071 0.5099999904632568
14
3 3


In [12]:
x_cell, y_cell = S * x - j, S * y - i

print(x_cell, y_cell)

0.5694049596786499 0.5699999332427979


In [13]:
width_cell, height_cell = (width * S, height * S,)

print(width_cell, height_cell)

6.8215296268463135 6.804000020027161


In [15]:
label_matrix.shape

torch.Size([7, 7, 30])

In [16]:
if label_matrix[i, j, 20] == 0:
    # Set that there exists an object
    label_matrix[i, j, 20] = 1

    # Box coordinates
    box_coordinates = torch.tensor(
        [x_cell, y_cell, width_cell, height_cell]
    )

    label_matrix[i, j, 21:25] = box_coordinates

    # Set one hot encoding for class_label
    label_matrix[i, j, class_label] = 1

In [18]:
label_matrix[i,j]

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 1.0000, 0.5694, 0.5700, 6.8215, 6.8040, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])

In [4]:
import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from model import YOLOv1
from dataset import VOCDataset
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)
from loss import YoloLoss


In [5]:

seed = 123
torch.manual_seed(seed)


LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
BATCH_SIZE = 16 # 64 in original paper but I don't have that much vram, grad accum?
WEIGHT_DECAY = 0    #Just trying to overfit for a small dataset rather than replicating what they did
EPOCHS = 100
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"
IMG_DIR = "/Users/yohanabeysinghe/Mac/0. ML/Datasets/PascalVOC YOLO/images"
LABEL_DIR = "/Users/yohanabeysinghe/Mac/0. ML/Datasets/PascalVOC YOLO/labels"

In [6]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])


def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave=True)
    mean_loss = []

    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update progress bar
        loop.set_postfix(loss=loss.item())

    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")

In [7]:
def main():
    model = YOLOv1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)

    train_dataset = VOCDataset(
        "data/100examples.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR,
    )

    test_dataset = VOCDataset(
        "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
    )

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    for epoch in range(EPOCHS):

        pred_boxes, target_boxes = get_bboxes(
            train_loader, model, iou_threshold=0.5, threshold=0.4
        )

        mean_avg_prec = mean_average_precision(
            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
        )
        print(f"Train mAP: {mean_avg_prec}")
        train_fn(train_loader, model, optimizer, loss_fn)

In [8]:
if __name__ == "__main__":
    main()

AssertionError: Torch not compiled with CUDA enabled