In [None]:
# Installs all of the libraries present in the 'offline-pytorch-2-1-2' dataset
!pip install \
   --requirement /kaggle/input/offline-pytorch-2-1-2/requirements.txt \
   --no-index \
   --find-links file:///kaggle/input/offline-pytorch-2-1-2/wheels  \
--q

In [None]:
import os
os.system("wget -q https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py")
os.system("wget -q https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py")
os.system("wget -q https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py")
os.system("wget -q https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py")
os.system("wget -q https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py")

In [None]:
import koilerplate
print(f"koilerplate: {koilerplate.__version__}")

import torch
print(f"torch: {torch.__version__}")

import torchvision
print(f"torchvision: {torchvision.__version__}")

import torchaudio
print(f"torchaudio: {torchaudio.__version__}")

import torchdata
print(f"torchdata: {torchdata.__version__}")

import torchtext
print(f"torchtext: {torchtext.__version__}")

In [None]:
!pip install pycocotools --quiet

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed

import pandas as pd # data processing
import numpy as np
# Data Visulization libraries 
import matplotlib.pyplot as plt 
import seaborn as sns 
from torchvision.io import read_image
from torchvision.tv_tensors import BoundingBoxes, Image
from torchvision.transforms.v2 import functional as F

In [None]:
files_dir = '/kaggle/input/bts-members-detection/images'

In [None]:
temp1 = ['/'+image for image in sorted(os.listdir(files_dir))
                        if (image[-4:]=='.png') and (image[:-4]+'.txt' in os.listdir(files_dir))
         and os.path.getsize(files_dir+'/'+image[:-4]+'.txt') != 0]
temp2 = ['/'+annot for annot in sorted(os.listdir(files_dir))
                        if (annot[-4:]=='.txt') and os.path.getsize(files_dir+'/'+annot) != 0]

images = pd.Series(temp1, name='images')
image_id = pd.Series(list(range(len(temp1))), name='id')
train_img_df = pd.DataFrame(pd.concat([images, image_id], axis=1))
images = []
image_id = []
for i in range(len(temp1)):
    with open(files_dir + temp2[i], 'r') as file:
        for j in range(len(file.readlines())):
            images.append(temp1[i])
            image_id.append(i)
        file.close()
bboxes = []
for i in range(len(temp1)):
    with open(files_dir + temp2[i], 'r') as file:
        for line in file.readlines():
            bboxes.append(list(map(float, line.split())))
        file.close()
images = pd.Series(images, name='images')
bboxes = pd.Series(bboxes, name='bboxes')
image_id = pd.Series(image_id, name='image_id')
ind = pd.Series(list(range(len(images))), name='id')
df = pd.concat([images, ind,image_id,bboxes], axis=1)
train_df = pd.DataFrame(df)
area = []
for i in range(train_df.shape[0]):
    img_path = files_dir + train_df.iloc[i,0]
    img = read_image(img_path)
    area.append(train_df.iloc[i,3][3]*train_df.iloc[i,3][4])
train_df = pd.concat([train_df, pd.Series(area, name='area')],axis=1)

In [None]:
train_df.head()

In [None]:
train_df.shape, train_img_df.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train, valid, train_img, valid_img = train_df.iloc[:int(809*0.8), :], train_df.iloc[int(809*0.8):, :], train_img_df.iloc[:int(809*0.8), :], train_img_df.iloc[int(809*0.8):, :]

# 0 - Dataset

In [None]:
import os
import torch

class BTSDataset(torch.utils.data.Dataset):
    def __init__(self, root, images_dataset, boxes_dataset, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.dataset = images_dataset
        self.boxes_dataset = boxes_dataset

    def __getitem__(self, idx):
        # load images and masks
        img_path = self.root + self.dataset.query('id == @idx').images[idx]
        boxes_query = self.boxes_dataset.query('image_id == @idx')
        boxes_list = list(boxes_query.bboxes.iloc[i][1:] for i in range(boxes_query.bboxes.shape[0]))
        labels_list = list(boxes_query.bboxes.iloc[i][0] + 1 for i in range(boxes_query.bboxes.shape[0]))
        img = read_image(img_path)
        boxes = torch.tensor(boxes_list)
        for box in boxes:
            box[0], box[1], box[2], box[3] = \
            box[0]*F.get_size(img)[1], box[1]*F.get_size(img)[0], \
            box[2]*F.get_size(img)[1], box[3]*F.get_size(img)[0]
        boxes = torchvision.ops.box_convert(boxes, 'cxcywh', 'xyxy')
        labels = torch.tensor(labels_list, dtype=torch.int64)
        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        
        # Wrap sample and targets into torchvision tv_tensors:
        img = Image(img)

        target = {}
        target["boxes"] = BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target

    def __len__(self):
        return len(self.dataset)

# 2 - Modifying the model to add a different backbone

In [None]:
from torchvision.transforms import v2 as T


def get_transform(train):
    transforms = []
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)

In [None]:
import torch.utils as utils
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from engine import train_one_epoch, evaluate

def my_collate(batch):
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    return [data, target]

# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
# our dataset has two classes only - background and person
num_classes = 7+1
# use our dataset and defined transformations
images_dataset = train_img
boxes_dataset = train
dataset = BTSDataset(root='/kaggle/input/bts-members-detection/images',
                     images_dataset=images_dataset, 
                     boxes_dataset=boxes_dataset, 
                     transforms=get_transform(train=True))
dataset_test = BTSDataset(root='/kaggle/input/bts-members-detection/images',
                     images_dataset=valid_img, 
                     boxes_dataset=valid, 
                     transforms=get_transform(train=False))



# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=16,
    shuffle=True,
    collate_fn=my_collate
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=False,
    collate_fn=my_collate
)


# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(
    params,
    lr=1e-3,
    weight_decay = 0.0005
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=5,
    gamma=0.1
)

# let's train it just for 2 epochs
num_epochs = 50

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    #evaluate(model, data_loader_test, device=device)

print("That's it!")

In [None]:
from torchvision.ops import nms

path_img = "/kaggle/input/bee-detection-dataset/test/images/20220708_084534_mp4-40_jpg.rf.796cb1b5e172d63df1c6116c2380cf19.jpg"
image = read_image(path_img)
eval_transform = get_transform(train=False)
model.eval()
with torch.no_grad():
    x = eval_transform(image)
    x = x.to(device)
    
    predictions = model([x, ])
    print(predictions)
    pred = predictions[0]
    print(pred)
    result = nms(pred['boxes'], pred['scores'], iou_threshold=0.1).to('cpu')
    pred = {'labels': pred['labels'][result], 
            'scores': pred['scores'][result], 
            'boxes': pred['boxes'][result, :]}
print(pred)

In [None]:
import matplotlib.pyplot as plt

from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks

path_img = "/kaggle/input/bee-detection-dataset/test/images/20220708_084534_mp4-40_jpg.rf.796cb1b5e172d63df1c6116c2380cf19.jpg"
image = read_image(path_img)
eval_transform = get_transform(train=False)

model.eval()
with torch.no_grad():
    x = eval_transform(image)
    # convert RGBA -> RGB and move to device
    x = x[:3, ...].to(device)
    predictions = model([x, ])
    pred = predictions[0]
    pred['boxes'][..., [0, 2]] -= pred['boxes'][..., [2, 0]].diff(axis=1)/2
    pred['boxes'][..., [1, 3]] -= pred['boxes'][..., [3, 1]].diff(axis=1)/2
    result = nms(pred['boxes'], pred['scores'], iou_threshold=0.01).to('cpu')
    pred = {'labels': pred['labels'][result], 
            'scores': pred['scores'][result], 
            'boxes': pred['boxes'][result, :]}
#image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
pred_labels = [f"{label}: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
pred_boxes = pred["boxes"].long()
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red",width=6, \
                                   font = '../input/synth-indic-custom-resources/SYNTH_INDIC/fonts/english/English.ttf',\
                                   font_size = 60)


plt.figure(figsize=(12, 12))
plt.imshow(output_image.permute(1, 2, 0))

In [None]:
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks

path_img = "/kaggle/input/bee-detection-dataset/test/images/20220708_084920_mp4-131_jpg.rf.ff5859b06e7f5a4e5e3a57aa3b5b436d.jpg"
image = read_image(path_img)
eval_transform = get_transform(train=False)

model.eval()
with torch.no_grad():
    x = eval_transform(image)
    # convert RGBA -> RGB and move to device
    x = x[:3, ...].to(device)
    predictions = model([x, ])
    pred = predictions[0]
    result = nms(pred['boxes'], pred['scores'], iou_threshold=0.01).to('cpu')
    pred = {'labels': pred['labels'][result], 
            'scores': pred['scores'][result], 
            'boxes': pred['boxes'][result, :]}


image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
image = image[:3, ...]
pred_labels = [f"{label}: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
pred_boxes = pred["boxes"].long()
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red",width=6, \
                                   font = '../input/synth-indic-custom-resources/SYNTH_INDIC/fonts/english/English.ttf',\
                                   font_size = 60)

plt.figure(figsize=(12, 12))
plt.imshow(output_image.permute(1, 2, 0))

In [None]:
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks

path_img = "/kaggle/input/bee-detection-dataset/test/images/BDD810C5-1B2A-4A40-91B9-B872748EABFB_mov-34_jpg.rf.86f65ad3a1d0661c9d7b0f7a077cf4cd.jpg"
image = read_image(path_img)
eval_transform = get_transform(train=False)

model.eval()
with torch.no_grad():
    x = eval_transform(image)
    # convert RGBA -> RGB and move to device
    x = x[:3, ...].to(device)
    predictions = model([x, ])
    pred = predictions[0]
    result = nms(pred['boxes'], pred['scores'], iou_threshold=0.01).to('cpu')
    pred = {'labels': pred['labels'][result], 
            'scores': pred['scores'][result], 
            'boxes': pred['boxes'][result, :]}


image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
image = image[:3, ...]
pred_labels = [f"{label}: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
pred_boxes = pred["boxes"].long()
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red",width=6, \
                                   font = '../input/synth-indic-custom-resources/SYNTH_INDIC/fonts/english/English.ttf',\
                                   font_size = 60)

plt.figure(figsize=(12, 12))
plt.imshow(output_image.permute(1, 2, 0))