In [1]:
import fiftyone as fo

from dataset import get_transforms, FiftyOneTorchDataset, collate_fn
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import engine

Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


In [2]:
batch_size = 5
num_epochs = 30
TRAIN_TEST_SPLIT = .9

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
TRAINED_MODEL_FILENAME = "model/halo_FRCNN_model.pth"

# re load the dataset
if len(fo.list_datasets()) > 0:
    dataset = fo.load_dataset("halo-dataset")
else:
    name = "halo-dataset"
    data_path = "C:/Users/blain\Documents\Git\AutoAim/halo_data/"
    labels_path = "C:/Users/blain\Documents\Git\AutoAim/halo_data/labels/"

    classes = ["enemy"]

    # Import dataset by explicitly providing paths to the source media and labels
    dataset = fo.Dataset.from_dir(
        dataset_type=fo.types.YOLOv4Dataset,
        data_path=data_path,
        labels_path=labels_path,
        classes=classes,
        name=name,
    )

dataset.compute_metadata()
session = fo.launch_app(dataset)


In [3]:
train_transforms = get_transforms(train=True)
test_transforms = get_transforms(train=False)

# split the dataset in train and test set
train_view = dataset.take((len(dataset) * TRAIN_TEST_SPLIT), seed=51)
test_view = dataset.exclude([s.id for s in train_view])

print(f'Traning on {len(train_view)} samples')
print(f'Testing on {len(test_view)} samples')

# use our dataset and defined transformations
train_dataset = FiftyOneTorchDataset(train_view, train_transforms,)
evaluation_dataset = FiftyOneTorchDataset(test_view, test_transforms)

#session.view = train_view

Traning on 45 samples
Testing on 5 samples


In [4]:
train_data_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=True,
    collate_fn=collate_fn,
    drop_last=False
)

valid_data_loader = torch.utils.data.DataLoader(
    evaluation_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
    collate_fn=collate_fn,
    drop_last=True
)

In [5]:
# load a pre-trained pre-trained FRCNN from torch
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one using our one enemy class.  Use 2 here because we need a background class
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)

# send the model to the training device
model.to(DEVICE)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

scaler = torch.cuda.amp.GradScaler()

In [6]:
for epoch in range(num_epochs):
    # training for one epoch
    engine.train_one_epoch(model, optimizer, train_data_loader, DEVICE, epoch,scaler=scaler, print_freq=1)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    engine.evaluate(model, train_data_loader, device=DEVICE)

Epoch: [0]  [0/9]  eta: 0:00:35  lr: 0.000629  loss: 0.7192 (0.7192)  loss_classifier: 0.5892 (0.5892)  loss_box_reg: 0.1035 (0.1035)  loss_objectness: 0.0246 (0.0246)  loss_rpn_box_reg: 0.0019 (0.0019)  time: 3.9783  data: 0.0950  max mem: 3465
Epoch: [0]  [1/9]  eta: 0:00:18  lr: 0.001254  loss: 0.6875 (0.7034)  loss_classifier: 0.5715 (0.5804)  loss_box_reg: 0.0573 (0.0804)  loss_objectness: 0.0246 (0.0336)  loss_rpn_box_reg: 0.0019 (0.0090)  time: 2.2717  data: 0.0915  max mem: 3789
Epoch: [0]  [2/9]  eta: 0:00:11  lr: 0.001878  loss: 0.6875 (0.6603)  loss_classifier: 0.5715 (0.5405)  loss_box_reg: 0.0884 (0.0831)  loss_objectness: 0.0246 (0.0295)  loss_rpn_box_reg: 0.0037 (0.0072)  time: 1.6928  data: 0.0870  max mem: 3789
Epoch: [0]  [3/9]  eta: 0:00:08  lr: 0.002503  loss: 0.5742 (0.5979)  loss_classifier: 0.4608 (0.4644)  loss_box_reg: 0.0884 (0.0862)  loss_objectness: 0.0246 (0.0380)  loss_rpn_box_reg: 0.0037 (0.0093)  time: 1.4033  data: 0.0838  max mem: 3789
Epoch: [0]  [4/9

KeyboardInterrupt: 

In [7]:
def apply_nms(orig_prediction, iou_thresh=0.3):

    # torchvision returns the indices of the bboxes to keep
    keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)

    final_prediction = orig_prediction
    final_prediction['boxes'] = final_prediction['boxes'][keep]
    final_prediction['scores'] = final_prediction['scores'][keep]
    final_prediction['labels'] = final_prediction['labels'][keep]

    return final_prediction

# function to convert a torchtensor back to PIL image
def torch_to_pil(img):
    return torchvision.transforms.ToPILImage()(img).convert('RGB')

import tkinter
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('TkAgg')
import matplotlib.patches as patches

def plot_img_bbox(img, target):
    # plot the image and bboxes
    # Bounding boxes are defined as follows: x-min y-min width height
    fig, a = plt.subplots(1,1)
    fig.set_size_inches(5,5)
    a.imshow(img)
    for box in (target['boxes']):
        x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
        rect = patches.Rectangle((x, y),
                                 width, height,
                                 linewidth = 2,
                                 edgecolor = 'r',
                                 facecolor = 'none')

        # Draw the bounding box on top of the image
        a.add_patch(rect)
    plt.show()


In [8]:
# pick one image from the test set
img, target = train_dataset[5]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(DEVICE)])[0]

print('predicted #boxes: ', len(prediction['labels']))
print('real #boxes: ', len(target['labels']))

predicted #boxes:  20
real #boxes:  1


In [10]:
print('EXPECTED OUTPUT')
plot_img_bbox(torch_to_pil(img), target)

EXPECTED OUTPUT


In [11]:
print('Predicted OUTPUT')
plot_img_bbox(torch_to_pil(img), prediction)

Predicted OUTPUT


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [12]:
prediction

{'boxes': tensor([[309.7555, 301.3320, 341.0692, 365.7558],
         [317.3797, 283.0357, 347.8021, 364.2500],
         [313.7378, 309.4354, 343.2237, 345.4171],
         [320.9241, 289.2632, 335.9792, 376.0393],
         [300.5381, 276.0068, 346.6392, 385.0882],
         [324.9810, 308.6666, 345.9605, 363.0152],
         [295.9156, 291.6388, 332.5501, 369.7255],
         [303.7346, 322.2299, 342.6017, 349.5093],
         [319.3935, 296.0606, 354.1078, 394.8098],
         [299.9713, 313.4511, 331.4622, 355.6393],
         [314.3451, 294.9984, 345.9138, 335.1835],
         [297.6116, 323.2365, 339.1385, 372.4258],
         [323.8223, 279.0023, 338.0670, 346.9181],
         [319.9921, 310.4760, 351.0009, 336.2351],
         [300.5486, 313.3826, 362.5050, 351.1314],
         [310.0913, 314.7109, 332.8177, 341.9038],
         [313.2756, 328.1349, 343.6857, 355.2656],
         [318.8855, 315.9940, 361.6632, 370.5016],
         [313.4791, 279.4824, 363.4555, 416.7647],
         [263.4851, 28

In [13]:
cpu_pred = {}
for key, value in prediction:
    cpu_pred[key] = value.cpu()

ValueError: too many values to unpack (expected 2)