In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
from torch.utils import data
import torch
import os
from from_scratch.dataset import Dataset, resize
from from_scratch.losses import ComputeLoss
from from_scratch.model import Yolo

In [3]:
dataset = Dataset('/home/davide/Desktop/change_detection/dataset', input_size=640)
train_loader = data.DataLoader(dataset, batch_size=32, num_workers=0, pin_memory=True, collate_fn=Dataset.collate_fn)
print(f"Train_loader : {len(train_loader)} batches")
batch=next(iter(train_loader))
print("All keys in batch      : ", batch[1].keys())
print(f"Input batch shape      : ", batch[0].shape)
print(f"Classification scores  : {batch[1]['cls'].shape}")
print(f"Box coordinates        : {batch[1]['box'].shape}")
print(f"Index identifier (which score belongs to which image): {batch[1]['idx'].shape}")


Train_loader : 4 batches
All keys in batch      :  dict_keys(['cls', 'box', 'idx'])
Input batch shape      :  torch.Size([32, 1, 640, 640])
Classification scores  : torch.Size([135, 1])
Box coordinates        : torch.Size([135, 4])
Index identifier (which score belongs to which image): torch.Size([135])


In [None]:
torch.manual_seed(42)

model = Yolo(version='n')
print(f"{sum(p.numel() for p in model.parameters())/1e6} million parameters")

criterion = ComputeLoss(model)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.5)

num_epochs = 40

imgs,targets=batch[0],batch[1]
imgs=imgs.float()
model.train()
for epoch in range(num_epochs):
    outputs=model(imgs)
    loss=sum(criterion(outputs,targets))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f"Epoch : {epoch + 1} | loss : {loss.item()}")


2.649366 million parameters


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch : 0 | loss : 208957.65625
Epoch : 1 | loss : 135740.4375
Epoch : 2 | loss : 82601.671875
Epoch : 3 | loss : 48735.58984375
Epoch : 4 | loss : 30216.58203125


In [None]:
model.eval()
test_img = cv2.imread('/home/davide/Desktop/change_detection/dataset/images/val/149.png')
test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)
h, w = test_img.shape
r = 640 / max(h, w)
if r != 1:
    test_img = cv2.resize(test_img, dsize=(int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)
test_img, ratio, pad = resize(test_img, 640)
h, w = test_img.shape
test_img = test_img.reshape((1, 1, h, w))
test_img = np.ascontiguousarray(test_img)
test_img = torch.from_numpy(test_img)

with torch.no_grad():
    out = model(test_img.float())
    out

Input shape: torch.Size([1, 1, 640, 640])
After conv_0: torch.Size([1, 16, 320, 320])
After conv_1: torch.Size([1, 32, 160, 160])
After c2f_2: torch.Size([1, 32, 160, 160])
After conv_3: torch.Size([1, 64, 80, 80])
After c2f_4: torch.Size([1, 64, 80, 80]) <----- out1 shape
After conv_5: torch.Size([1, 128, 40, 40])
After c2f_6: torch.Size([1, 128, 40, 40]) <----- out2 shape
After conv_7: torch.Size([1, 256, 20, 20])
After c2f_8: torch.Size([1, 256, 20, 20])
After sppf: torch.Size([1, 256, 20, 20])
Input shapes: x_res_1: torch.Size([1, 64, 80, 80]), x_res_2: torch.Size([1, 128, 40, 40]), x (res_1): torch.Size([1, 256, 20, 20])
After upsample: torch.Size([1, 256, 40, 40])
After concatenation with x_res_2: torch.Size([1, 384, 40, 40])
After c2f_1: torch.Size([1, 128, 40, 40]) <-- res_2 shape
After upsample: torch.Size([1, 128, 80, 80])
After concatenation with x_res_1: torch.Size([1, 192, 80, 80])
After c2f_2: torch.Size([1, 64, 80, 80]) <----- out_1 shape
After cv_1: torch.Size([1, 64, 4

In [63]:
out = out[0].T

boxes = out[:, :4]
confidences_0 = out[:, 4]
confidences_1 = out[:, 5]

scores = torch.maximum(confidences_0, confidences_1)
mask = scores > 0.6
boxes = boxes[mask]
scores = scores[mask]
confidences_0 = confidences_0[mask]
confidences_1 = confidences_1[mask]

boxes_xyxy = torch.zeros_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2
print(f"Boxes shape : {boxes_xyxy.shape}")
print(f"Scores shape : {scores.shape}")
print(f"Confidences 0 shape : {confidences_0.shape}")
print(f"Confidences 1 shape : {confidences_1.shape}")
import torchvision
keep = torchvision.ops.nms(boxes_xyxy, scores, iou_threshold=0)
boxes_xyxy = boxes_xyxy[keep]
scores = scores[keep]
confidences_0 = confidences_0[keep]
confidences_1 = confidences_1[keep]

Boxes shape : torch.Size([6400, 4])
Scores shape : torch.Size([6400])
Confidences 0 shape : torch.Size([6400])
Confidences 1 shape : torch.Size([6400])


In [64]:
keep.shape

torch.Size([6400])

In [4]:
dataset.__getitem__(0)

(tensor([[[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8),
 tensor([[1.],
         [0.]]),
 tensor([[0.4320, 0.1960, 0.0279, 0.0246],
         [0.5459, 0.8270, 0.0262, 0.0311]]),
 tensor([0., 0.]))