In [1]:
import torch, os, tqdm, json
import numpy as np
import torch.nn as nn


from loss import YOLOLoss
from models import ResNet
from data import YOLODataset
from utils import parse, get_metadata, intersection_over_union
from torch.utils.data import Dataset, DataLoader


device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
with open('red_examples.json', 'r') as f:
    red_examples = json.load(f)
    
train_red_book = red_examples['train_red_book']
val_red_book = red_examples['val_red_book']

In [3]:
image_names = [x for x in os.listdir("dataset/valid") if x[-3:] == "jpg"]
annotation_names = [x for x in os.listdir("dataset/valid") if x[-3:] == "xml"]

In [4]:
data = []

for name in ['test', 'train', 'valid']:
    file = []
    image_names = [x for x in os.listdir("dataset/" + name) if x[-3:] == "jpg"]
    annotation_names = [x for x in os.listdir("dataset/" + name) if x[-3:] == "xml"]
    metadata = get_metadata(name, annotation_names)
    
    for i in range(len(metadata)):
        row = []
        
        row.append(f"dataset/{name}/" + image_names[i])
        row.append(metadata[i])
        file.append(row)
    data.append(file)

100%|████████████████████████████████████████████████████████████████████████████████| 458/458 [00:05<00:00, 85.55it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2634/2634 [00:26<00:00, 99.70it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 966/966 [00:10<00:00, 95.50it/s]


In [5]:
d_indx = 0
for red_idx in train_red_book:
    data[1].pop(red_idx - d_indx)
    d_indx += 1 


d_indx = 0
for red_idx in val_red_book:
    del data[2][red_idx - d_indx]
    d_indx += 1

In [6]:
encoder = {
    "truck": 1,
    "car": 2,
    "bus": 3
}

encoder = {
    "truck": torch.tensor([1, 0, 0]),
    "car": torch.tensor([0, 1, 0]),
    "bus": torch.tensor([0, 0, 1]),
}

ANCHORS = torch.tensor([
    [104, 104, 120, 127], 
    [ 28, 109,  53, 144],
    # [100, 104, 125, 127]
]) / 320

anchors = ANCHORS[:, 2:].reshape(1, 2, 1, 1, 2).to(device)

In [8]:
train_dataset = YOLODataset(data[1], encoder, ANCHORS, train_transforms)
val_dataset = YOLODataset(data[2], encoder, ANCHORS, transform)

trainLoader = DataLoader(train_dataset, batch_size=64)
valLoader = DataLoader(val_dataset, batch_size=64)

In [9]:
model = ResNet(in_channels=3, num_classes=3, layers=[2, 2, 2]).to(device)
model.load_state_dict(torch.load("models/best_ModelTest.pt")['model'])

<All keys matched successfully>

In [10]:
criterion = YOLOLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer.load_state_dict(torch.load("models/best_ModelTest.pt")['optimizer'])

In [13]:
def save_model(model_name, epoch, prev_loss, current_loss, model, optimizer):
    if prev_loss > current_loss:
        torch.save({
            "epoch": epoch,
            "model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "loss": current_loss,
        }, f"models/best_{model_name}.pt") # create models folder before! 
        print("The best model was saved!")
        prev_loss = current_loss
    
    torch.save({
            "epoch": epoch,
            "model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "loss": current_loss,
        }, f"models/last_{model_name}.pt")
    return prev_loss


os.makedirs("models", exist_ok=True)

num_epochs, prev_loss = 100, -torch.inf
train_loss, val_loss = [], []

In [None]:
for epoch in tqdm.trange(num_epochs):
    model.train()
    loss_list, count = 0, 0
    for images, targets in trainLoader:
        images = images.to(device, dtype=torch.float) # .permute(0, 3, 1, 2)
        # targets = targets
        optimizer.zero_grad()
        
        out1, out2 = model(images)
        
        # loss = criterion(features, output, targets)
        loss = criterion(out1.reshape(-1, 2, 40, 40, 8), targets[0].to(device), anchors) + criterion(out2.reshape(-1, 2, 20, 20, 8), targets[1].to(device), anchors)
        loss.backward()
        optimizer.step()
        loss_list += loss.item()
        count += 1
    loss = loss_list/count
    train_loss.append(loss)
    
    model.eval()
    @torch.no_grad()
    def validation():
        val_loss, val_counter = 0, 0
        for images, targets in valLoader:
            images = images.permute(0, 3, 1, 2).to(device, dtype=torch.float) # .permute(0, 3, 1, 2)
            # targets = targets.to(device)
            
            out1, out2 = model(images)
            
            loss = criterion(out1.reshape(-1, 2, 40, 40, 8), targets[0].to(device), anchors) + criterion(out2.reshape(-1, 2, 20, 20, 8), targets[1].to(device), anchors)
            val_loss += loss.item()
            val_counter += 1
        return val_loss / val_counter
    v_loss = validation()
    val_loss.append(v_loss)
    prev_loss = save_model("ModelTest", epoch + 100, prev_loss, v_loss, model, optimizer)
    
    print(f"Epoch: {epoch} | Training loss {loss} | Validation loss {v_loss}")

  1%|▊                                                                              | 1/100 [02:09<3:34:11, 129.82s/it]

Epoch: 0 | Training loss 12.385187058221726 | Validation loss 11.739208936691284


  2%|█▌                                                                             | 2/100 [03:58<3:11:35, 117.30s/it]

Epoch: 1 | Training loss 12.355651128859748 | Validation loss 11.026854038238525


  3%|██▎                                                                            | 3/100 [05:49<3:05:05, 114.49s/it]

Epoch: 2 | Training loss 12.504978043692452 | Validation loss 12.357191622257233


  4%|███▏                                                                           | 4/100 [07:42<3:01:57, 113.72s/it]

Epoch: 3 | Training loss 12.764366831098284 | Validation loss 12.171160519123077


  5%|███▉                                                                           | 5/100 [09:28<2:56:00, 111.16s/it]

Epoch: 4 | Training loss 12.695029485793341 | Validation loss 12.584539532661438


  6%|████▋                                                                          | 6/100 [11:29<2:59:33, 114.61s/it]

Epoch: 5 | Training loss 13.338673023950486 | Validation loss 12.966343879699707


  7%|█████▌                                                                         | 7/100 [13:24<2:57:50, 114.74s/it]

Epoch: 6 | Training loss 12.838110560462589 | Validation loss 12.140791594982147


  8%|██████▎                                                                        | 8/100 [15:20<2:56:15, 114.95s/it]

Epoch: 7 | Training loss 12.424103328159877 | Validation loss 11.521124303340912


  9%|███████                                                                        | 9/100 [17:14<2:53:53, 114.65s/it]

Epoch: 8 | Training loss 12.619962873912993 | Validation loss 11.60217970609665


 10%|███████▊                                                                      | 10/100 [19:02<2:48:53, 112.60s/it]

Epoch: 9 | Training loss 12.64015143258231 | Validation loss 11.939994513988495


 11%|████████▌                                                                     | 11/100 [20:50<2:44:54, 111.18s/it]

Epoch: 10 | Training loss 12.390050751822335 | Validation loss 11.460948288440704


 12%|█████████▎                                                                    | 12/100 [22:38<2:41:51, 110.36s/it]

Epoch: 11 | Training loss 13.400397459665934 | Validation loss 11.827004134654999


In [16]:
out1.shape

torch.Size([10, 16, 40, 40])

In [17]:
targets[0].shape

torch.Size([10, 2, 40, 40, 8])

In [1]:
import cv2

In [None]:
cv2.imread()

In [15]:
images.shape

torch.Size([1, 3, 320, 320])

In [7]:
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

IMAGE_SIZE = 320
scale = 1.1

transform = A.Compose([
                A.Blur(blur_limit=50, p=0.1),
                A.MedianBlur(blur_limit=51, p=0.1),
                A.ToGray(p=0.3)],
                bbox_params=A.BboxParams(format='yolo', label_fields=[]))

train_transforms = A.Compose(
    [
        A.LongestMaxSize(max_size=int(IMAGE_SIZE * scale)),
        A.PadIfNeeded(
            min_height=int(IMAGE_SIZE * scale),
            min_width=int(IMAGE_SIZE * scale),
            border_mode=cv2.BORDER_CONSTANT,
        ),
        A.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE),
        A.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4),
        A.OneOf(
            [
                A.ShiftScaleRotate(
                    rotate_limit=20, p=0.5, border_mode=cv2.BORDER_CONSTANT
                ),
                A.IAAAffine(shear=15, p=0.5, mode="constant"),
            ],
            p=1.0,
        ),
        A.HorizontalFlip(p=0.5),
        A.Blur(p=0.1),
        A.CLAHE(p=0.1),
        A.Posterize(p=0.1),
        A.ToGray(p=0.1),
        A.ChannelShuffle(p=0.05),
        # A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[],),
)
test_transforms = A.Compose(
    [
        A.LongestMaxSize(max_size=IMAGE_SIZE),
        A.PadIfNeeded(
            min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
        ),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)




In [13]:
q = [(0.555230164527893, 0.378763222694397, 0.035061979293823264, 0.034236288070678744), (0.7444658160209656, 0.3889347195625305, 0.0419869661331177, 0.03695943355560305), (0.4715870380401611, 0.4050339698791504, 0.045640659332275346, 0.047342491149902355), (0.6916573882102967, 0.410683798789978, 0.04542219638824463, 0.03703227043151858), (0.7938147485256195, 0.4471988916397095, 0.05605548620223999, 0.05271601676940918), (0.2944324016571045, 0.4784142255783081, 0.06723499298095703, 0.0941751956939697), (0.36526331901550296, 0.49625308513641353, 0.10595073699951174, 0.138794469833374), (0.4546086072921753, 0.5768776655197143, 0.0877914905548095, 0.09203457832336426), (0.41252171993255615, 0.780198049545288, 0.16893115043640128, 0.19423360824584968), (0.8886498898267746, 0.7868667840957642, 0.17154697775840755, 0.15564322471618652)]

In [15]:
torch.tensor(q)[None]

tensor([[[0.5552, 0.3788, 0.0351, 0.0342],
         [0.7445, 0.3889, 0.0420, 0.0370],
         [0.4716, 0.4050, 0.0456, 0.0473],
         [0.6917, 0.4107, 0.0454, 0.0370],
         [0.7938, 0.4472, 0.0561, 0.0527],
         [0.2944, 0.4784, 0.0672, 0.0942],
         [0.3653, 0.4963, 0.1060, 0.1388],
         [0.4546, 0.5769, 0.0878, 0.0920],
         [0.4125, 0.7802, 0.1689, 0.1942],
         [0.8886, 0.7869, 0.1715, 0.1556]]])

In [None]:
Добрый вечер!
Почему albumentations ошибку показывает хотя bboxes в формате yolo and normalized? 
Кто-нибудь знает как решить эту проблему? 


аугментировать получается до определенного прямоугольника