# Prepare Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py
!pip install fiftyone

--2023-06-10 14:40:50--  https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 23337 (23K) [text/plain]
Saving to: ‘transforms.py’


2023-06-10 14:40:50 (10.1 MB/s) - ‘transforms.py’ saved [23337/23337]

--2023-06-10 14:40:50--  https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4032 (3.9K) [text/plain]
Saving to: ‘engine.py’


2023-06-10 14:40:51 (68.2 MB/s) - ‘engine

# Import Dependencies

In [None]:
import torch
import torchvision
import torch.utils.data
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.coco as fouc

import utils
import transforms as T
from engine import train_one_epoch, evaluate

from PIL import Image

Migrating database to v0.21.0


INFO:fiftyone.migrations.runner:Migrating database to v0.21.0


# Prepare Dataset

In [None]:
# Load the COCO-2017 dataset
# This will download it from the FiftyOne Dataset Zoo if necessary
train_dataset = foz.load_zoo_dataset("coco-2017", split="train", label_types=["detections"], classes=["person"], max_samples=3000)
test_dataset = foz.load_zoo_dataset("coco-2017", split="validation", label_types=["detections"], classes=["person"], max_samples=500)

# Print summary information about the view
print(train_dataset)

Downloading split 'train' to '/root/fiftyone/coco-2017/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/coco-2017/train' if necessary


Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


INFO:fiftyone.utils.coco:Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


 100% |██████|    1.9Gb/1.9Gb [16.6s elapsed, 0s remaining, 131.0Mb/s]      


INFO:eta.core.utils: 100% |██████|    1.9Gb/1.9Gb [16.6s elapsed, 0s remaining, 131.0Mb/s]      


Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_train2017.json'


INFO:fiftyone.utils.coco:Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_train2017.json'


Downloading 3000 images


INFO:fiftyone.utils.coco:Downloading 3000 images


 100% |████████████████| 3000/3000 [25.5m elapsed, 0s remaining, 2.0 images/s]      


INFO:eta.core.utils: 100% |████████████████| 3000/3000 [25.5m elapsed, 0s remaining, 2.0 images/s]      


Writing annotations for 3000 downloaded samples to '/root/fiftyone/coco-2017/train/labels.json'


INFO:fiftyone.utils.coco:Writing annotations for 3000 downloaded samples to '/root/fiftyone/coco-2017/train/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'train'


 100% |███████████████| 3000/3000 [31.8s elapsed, 0s remaining, 78.9 samples/s]       


INFO:eta.core.utils: 100% |███████████████| 3000/3000 [31.8s elapsed, 0s remaining, 78.9 samples/s]       


Dataset 'coco-2017-train-3000' created


INFO:fiftyone.zoo.datasets:Dataset 'coco-2017-train-3000' created


Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


Downloading 500 images


INFO:fiftyone.utils.coco:Downloading 500 images


 100% |██████████████████| 500/500 [4.2m elapsed, 0s remaining, 1.9 images/s]      


INFO:eta.core.utils: 100% |██████████████████| 500/500 [4.2m elapsed, 0s remaining, 1.9 images/s]      


Writing annotations for 500 downloaded samples to '/root/fiftyone/coco-2017/validation/labels.json'


INFO:fiftyone.utils.coco:Writing annotations for 500 downloaded samples to '/root/fiftyone/coco-2017/validation/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'validation'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'validation'


 100% |█████████████████| 500/500 [7.2s elapsed, 0s remaining, 113.8 samples/s]     


INFO:eta.core.utils: 100% |█████████████████| 500/500 [7.2s elapsed, 0s remaining, 113.8 samples/s]     


Dataset 'coco-2017-validation-500' created


INFO:fiftyone.zoo.datasets:Dataset 'coco-2017-validation-500' created


Name:        coco-2017-train-3000
Media type:  image
Num samples: 3000
Persistent:  False
Tags:        []
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


In [None]:
# Iterate over the dataset
for sample in train_dataset:
    # Get the detections
    detections = sample.ground_truth.detections
    # Filter out non-person detections
    detections = [d for d in detections if d.label == "person"]
    # Update the detections
    sample.ground_truth.detections = detections
    # Save the sample
    sample.save()

In [None]:
# Iterate over the dataset
for sample in test_dataset:
    # Get the detections
    detections = sample.ground_truth.detections
    # Filter out non-person detections
    detections = [d for d in detections if d.label == "person"]
    # Update the detections
    sample.ground_truth.detections = detections
    # Save the sample
    sample.save()

## Object Dataset

In [None]:
class ObjectDataset(torch.utils.data.Dataset):
  def __init__(self, root, transforms=None):
    #please define the data proses init
    self.root = root
    self.transforms = transforms

    self.img_paths = self.root.values("filepath")

    self.classes = self.root.distinct("%s.detections.label" % "ground_truth")
    if self.classes[0] != "background":
        self.classes = ["background"] + self.classes

    self.labels_map_rev = {c: i for i, c in enumerate(self.classes)}

  def __getitem__(self, idx):
    # please define the dataloader
    img_path = self.img_paths[idx]
    sample_img = self.root[img_path]
    metadata = sample_img.metadata

    img = Image.open(img_path).convert('RGB')

    boxes = []
    labels = []
    image_id = []
    area = []
    iscrowd = []

    for det in sample_img["ground_truth"].detections:
      category_id = self.labels_map_rev[det.label]
      coco_obj = fouc.COCOObject.from_label(
          det, metadata, category_id=category_id,
      )
      x, y, w, h = coco_obj.bbox
      boxes.append([x, y, x + w, y + h])
      labels.append(coco_obj.category_id)
      area.append(coco_obj.area)
      iscrowd.append(coco_obj.iscrowd)

    target = {}
    target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
    target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
    target["image_id"] = torch.as_tensor([idx])
    target["area"] = torch.as_tensor(area, dtype=torch.float32)
    target["iscrowd"] = torch.as_tensor(iscrowd, dtype=torch.int64)

    if self.transforms is not None: #preprocessing dan augmentasi
      img, target = self.transforms(img, target)

    return img, target

  def __len__(self):
    return len(self.img_paths)


## Data Transforms

In [None]:
def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))

    return T.Compose(transforms)

## Apply Transformations and Dataloader

In [None]:
batch_size = 4

# Use our dataset and defined transformations
train_data = ObjectDataset(train_dataset, get_transform(train=True))
test_data = ObjectDataset(test_dataset, get_transform(train=False))

# Define training and validation data loaders
train_dataloader = torch.utils.data.DataLoader(
  train_data, batch_size=batch_size, shuffle=True, num_workers=2,
  collate_fn=utils.collate_fn)

test_dataloader = torch.utils.data.DataLoader(
  test_data, batch_size=batch_size, shuffle=False, num_workers=2,
  collate_fn=utils.collate_fn)


# Build Model

## Resnet50_FPN

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

num_classes = 2

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

## ResNet18

In [None]:
resnet = torchvision.models.resnet18(weights="DEFAULT")
backbone = torch.nn.Sequential(*list(resnet.children())[:-2])

backbone.out_channels = 512

anchor_generator = AnchorGenerator()

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names= ['0'], output_size=7, sampling_ratio=2)

resnet_model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 86.4MB/s]


## GoogleNet

In [None]:
# backbone googlenet
googlenet = torchvision.models.googlenet(weights="DEFAULT")

backbone = torch.nn.Sequential(*list(googlenet.children())[:-3]) # menghilangkan fully connected layer dan global avg pool

backbone.out_channels = 1024

anchor_generator = AnchorGenerator()

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names= ['0'], output_size=7, sampling_ratio=2)

googlenet = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:00<00:00, 133MB/s]


## VGG16

In [None]:
vgg16 = torchvision.models.vgg16(weights="DEFAULT")
backbone = vgg16.features

backbone.out_channels = 512

anchor_generator = AnchorGenerator()

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names= ['0'], output_size=7, sampling_ratio=2)

vgg16_model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:06<00:00, 86.6MB/s]


## Train and Evaluate Model

In [None]:
def train(model, train_dataloader, test_dataloader, num_epochs=5):

    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # move model to the right device
    model.to(device)

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=150)

        # update the learning rate
        lr_scheduler.step()

        # save current epoch model
        torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, "/content/drive/MyDrive/last_model.pt")

        # evaluate on the test dataset
        evaluate(model, test_dataloader, device=device)

    print("That's it!")

In [None]:
train(vgg16_model, train_dataloader, test_dataloader, 20)

# Load Model

In [None]:
import os
root_dir = "/content/drive/MyDrive/Person_Tracking"
googlenet_dir = os.path.join(root_dir, "googlenet/last_model.pt")
vgg16_dir = os.path.join(root_dir, "vgg16/last_model.pt")
resnet18_dir = os.path.join(root_dir, "resnet18/last_model.pt")

In [None]:
print(googlenet_dir)

/content/drive/MyDrive/Person_Tracking/googlenet/last_model.pt


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
print(device)

cpu


### Load ResNet18 Model

In [None]:
resnet_model.to(device)
checkpoint_resnet18 = torch.load(resnet18_dir)
resnet_model.load_state_dict(checkpoint_resnet18['model_state_dict'])

### Load VGG16 Model

In [None]:
vgg16_model.to(device)
checkpoint_vgg16 = torch.load(vgg16_dir)
vgg16_model.load_state_dict(checkpoint_vgg16['model_state_dict'])

### Load Googlenet Model

In [None]:
googlenet.to(device)
checkpoint_googlenet = torch.load(googlenet_dir)
googlenet.load_state_dict(checkpoint_googlenet['model_state_dict'])

# Test Detection

In [None]:
class_names = ["background", "person"]

In [None]:
import cv2
import matplotlib.pyplot as plt
import time
import random

def plot_one_box(x, img, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = x[0], x[1]
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def detect(img, model, threshold=0.5):
  model.eval()
  # Run inference
  t0 = time.time()

  with torch.no_grad():
    img = Image.fromarray(img)
    img = T.PILToTensor()(img)
    img = T.ConvertImageDtype(torch.float)(img[0])
    img = img[0].unsqueeze(0).to(device)
    pred = model(img)

  print(f'[INFO]: Inference Time ({time.time() - t0:.3f}s)')
  pred_class = [class_names[i] for i in list(pred[0]['labels'].cpu().numpy())]
  pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].cpu().detach().numpy())]
  pred_score = list(pred[0]['scores'].cpu().detach().numpy())
  pred_t = [pred_score.index(x) for x in pred_score if x>threshold][-1]
  pred_boxes = pred_boxes[:pred_t+1]
  pred_class = pred_class[:pred_t+1]
  pred_score = pred_score[:pred_t+1]
  return pred_boxes, pred_class,pred_score


In [None]:
img = cv2.imread('image2.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
boxes, classes, scores = detect(img, resnet_model, 0.8)
print(f'[INFO]: Plotting image...')
if boxes:
  for i, box in enumerate(boxes):
    plot_one_box(box,img, label="{} {:.2f}".format(classes[i], scores[i]))
plt.figure(figsize=(5,10))
plt.imshow(img)
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
cap = cv2.VideoCapture("tes.mp4")

fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

vid_writer = cv2.VideoWriter(
    'output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))

while True:
    _, img = cap.read()
    if not _:
        break
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    boxes, classes, scores = detect(img, resnet_model, 0.8)
    if boxes:
      for i, box in enumerate(boxes):
        plot_one_box(box,img, label="{} {:.2f}".format(classes[i], scores[i]))
    print(img.shape)
    # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    vid_writer.write(img)
vid_writer.release()
cap.release()