In [8]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [9]:
# extract it in the current folder
!unzip gdrive/My\ Drive/project_data.zip

[1;30;43mStreaming af output blev afkortet til de sidste 5000 linjer.[0m
  inflating: project_data/ImagesVideo_2/frame_000253.PNG  
  inflating: project_data/ImagesVideo_2/frame_000254.PNG  
  inflating: project_data/ImagesVideo_2/frame_000255.PNG  
  inflating: project_data/ImagesVideo_2/frame_000256.PNG  
  inflating: project_data/ImagesVideo_2/frame_000257.PNG  
  inflating: project_data/ImagesVideo_2/frame_000258.PNG  
  inflating: project_data/ImagesVideo_2/frame_000259.PNG  
  inflating: project_data/ImagesVideo_2/frame_000260.PNG  
  inflating: project_data/ImagesVideo_2/frame_000261.PNG  
  inflating: project_data/ImagesVideo_2/frame_000262.PNG  
  inflating: project_data/ImagesVideo_2/frame_000263.PNG  
  inflating: project_data/ImagesVideo_2/frame_000264.PNG  
  inflating: project_data/ImagesVideo_2/frame_000265.PNG  
  inflating: project_data/ImagesVideo_2/frame_000266.PNG  
  inflating: project_data/ImagesVideo_2/frame_000267.PNG  
  inflating: project_data/ImagesVideo_2/

In [10]:
import xml.etree.cElementTree as et
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image

#Define dataset class
class CanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.annos = list(sorted(os.listdir(os.path.join(root, "Annotations"))))

    def __getitem__(self, idx):
        # load images
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        ann_path = os.path.join(self.root, "Annotations", self.annos[idx])
        img = Image.open(img_path).convert("RGB")
        
        tree0 = et.ElementTree(file=ann_path)
        root0 = tree0.getroot()
        obj_ids = []
        bbox_xmin = []; bbox_xmax = []; bbox_ymin = []; bbox_ymax = []

        # get object id's
        for obj in root0.iter('name'):
          if obj.text == 'beer':
            obj_ids.append(1)
          elif obj.text == 'cola':
            obj_ids.append(2)
        
        # get bounding box coordinates
        for obj in root0.iter('xmin'):
          bbox_xmin.append(int(float(obj.text)))
        for obj in root0.iter('ymin'):
          bbox_ymin.append(int(float(obj.text)))
        for obj in root0.iter('xmax'):
          bbox_xmax.append(int(float(obj.text)))
        for obj in root0.iter('ymax'):
          bbox_ymax.append(int(float(obj.text)))
        
        boxes = []
        num_objs = len(obj_ids)
        for i in range(num_objs):
            boxes.append([bbox_xmin[i], bbox_ymin[i], 
                          bbox_xmax[i], bbox_ymax[i]])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(obj_ids, dtype=torch.int64)
        image_id = torch.tensor([idx])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        if num_objs == 0:
          target['boxes'] = torch.zeros((0,4),dtype=torch.float32)
          target["labels"] = torch.zeros((0,4), dtype=torch.int64)
          target["image_id"] = image_id
          target["area"] = torch.zeros((0,4),dtype=torch.float32)
          target["iscrowd"] = torch.zeros((0,4), dtype=torch.int64)
        else:
          target["boxes"] = boxes
          target["labels"] = labels
          target["image_id"] = image_id
          area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
          target["area"] = area
          target["iscrowd"] = iscrowd


        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [11]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_instance_segmentation_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [12]:
%%shell
# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.8.2

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

fatal: destination path 'vision' already exists and is not an empty directory.
HEAD is now at 2f40a483d [v0.8.X] .circleci: Add Python 3.9 to CI (#3063)




In [13]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# transforms for training
def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [14]:
# use our dataset and defined transformations
# training done on video_1, test on video_2 (can easily be changed here)
dataset = CanDataset('project_data/video_1', get_transform(train=True))
dataset_test = CanDataset('project_data/video_2', get_transform(train=False))

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [15]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has three classes (beer, cola and background)
num_classes = 3

# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [16]:
# let's train it for 10 epochs
from torch.optim.lr_scheduler import StepLR
num_epochs = 30

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [   0/1089]  eta: 0:18:40  lr: 0.000010  loss: 1.2909 (1.2909)  loss_classifier: 1.0371 (1.0371)  loss_box_reg: 0.2513 (0.2513)  loss_objectness: 0.0015 (0.0015)  loss_rpn_box_reg: 0.0010 (0.0010)  time: 1.0291  data: 0.2993  max mem: 2115
Epoch: [0]  [  10/1089]  eta: 0:06:32  lr: 0.000060  loss: 1.0341 (1.0453)  loss_classifier: 0.9643 (0.8950)  loss_box_reg: 0.1394 (0.1466)  loss_objectness: 0.0003 (0.0021)  loss_rpn_box_reg: 0.0010 (0.0016)  time: 0.3638  data: 0.0316  max mem: 2379
Epoch: [0]  [  20/1089]  eta: 0:05:54  lr: 0.000110  loss: 0.7969 (0.7772)  loss_classifier: 0.5595 (0.6245)  loss_box_reg: 0.1116 (0.1472)  loss_objectness: 0.0013 (0.0038)  loss_rpn_box_reg: 0.0010 (0.0016)  time: 0.2968  data: 0.0050  max mem: 2379
Epoch: [0]  [  30/1089]  eta: 0:05:39  lr: 0.000160  loss: 0.3576 (0.6068)  loss_classifier: 0.1707 (0.4602)  loss_box_reg: 0.1116 (0.1423)  loss_objectness: 0.0007 (0.0029)  loss_rpn_box_reg: 0.0010 (0.0014)  time: 0.2966  data: 0.0055  max me

In [17]:
from scipy.spatial import distance as dist
from collections import OrderedDict

#Centroid tracking
class CentroidTracker():
	def __init__(self, maxDisappeared=50):
		# initialize the next unique object ID along with two ordered
		# dictionaries used to keep track of mapping a given object
		# ID to its centroid and number of consecutive frames it has
		# been marked as "disappeared", respectively
		self.nextObjectID = 0
		self.objects = OrderedDict()
		self.disappeared = OrderedDict()

		# store the number of maximum consecutive frames a given
		# object is allowed to be marked as "disappeared" until we
		# need to deregister the object from tracking
		self.maxDisappeared = maxDisappeared

	def register(self, centroid):
		# when registering an object we use the next available object
		# ID to store the centroid
		self.objects[self.nextObjectID] = centroid
		self.disappeared[self.nextObjectID] = 0
		self.nextObjectID += 1

	def deregister(self, objectID):
		# to deregister an object ID we delete the object ID from
		# both of our respective dictionaries
		del self.objects[objectID]
		del self.disappeared[objectID]

	def update(self, rects):
		# check to see if the list of input bounding box rectangles
		# is empty
		if len(rects) == 0:
			# loop over any existing tracked objects and mark them
			# as disappeared
			for objectID in list(self.disappeared.keys()):
				self.disappeared[objectID] += 1

				# if we have reached a maximum number of consecutive
				# frames where a given object has been marked as
				# missing, deregister it
				if self.disappeared[objectID] > self.maxDisappeared:
					self.deregister(objectID)

			# return early as there are no centroids or tracking info
			# to update
			return self.objects

		# initialize an array of input centroids for the current frame
		inputCentroids = np.zeros((len(rects), 2), dtype="int")

		# loop over the bounding box rectangles
		for (i, (startX, startY, endX, endY)) in enumerate(rects):
			# use the bounding box coordinates to derive the centroid
			cX = int((startX + endX) / 2.0)
			cY = int((startY + endY) / 2.0)
			inputCentroids[i] = (cX, cY)

		# if we are currently not tracking any objects take the input
		# centroids and register each of them
		if len(self.objects) == 0:
			for i in range(0, len(inputCentroids)):
				self.register(inputCentroids[i])

		# otherwise, are are currently tracking objects so we need to
		# try to match the input centroids to existing object
		# centroids
		else:
			# grab the set of object IDs and corresponding centroids
			objectIDs = list(self.objects.keys())
			objectCentroids = list(self.objects.values())

			# compute the distance between each pair of object
			# centroids and input centroids, respectively -- our
			# goal will be to match an input centroid to an existing
			# object centroid
			D = dist.cdist(np.array(objectCentroids), inputCentroids)

			# in order to perform this matching we must (1) find the
			# smallest value in each row and then (2) sort the row
			# indexes based on their minimum values so that the row
			# with the smallest value as at the *front* of the index
			# list
			rows = D.min(axis=1).argsort()

			# next, we perform a similar process on the columns by
			# finding the smallest value in each column and then
			# sorting using the previously computed row index list
			cols = D.argmin(axis=1)[rows]

			# in order to determine if we need to update, register,
			# or deregister an object we need to keep track of which
			# of the rows and column indexes we have already examined
			usedRows = set()
			usedCols = set()

			# loop over the combination of the (row, column) index
			# tuples
			for (row, col) in zip(rows, cols):
				# if we have already examined either the row or
				# column value before, ignore it
				# val
				if row in usedRows or col in usedCols:
					continue

				# otherwise, grab the object ID for the current row,
				# set its new centroid, and reset the disappeared
				# counter
				objectID = objectIDs[row]
				self.objects[objectID] = inputCentroids[col]
				self.disappeared[objectID] = 0

				# indicate that we have examined each of the row and
				# column indexes, respectively
				usedRows.add(row)
				usedCols.add(col)

			# compute both the row and column index we have NOT yet
			# examined
			unusedRows = set(range(0, D.shape[0])).difference(usedRows)
			unusedCols = set(range(0, D.shape[1])).difference(usedCols)

			# in the event that the number of object centroids is
			# equal or greater than the number of input centroids
			# we need to check and see if some of these objects have
			# potentially disappeared
			if D.shape[0] >= D.shape[1]:
				# loop over the unused row indexes
				for row in unusedRows:
					# grab the object ID for the corresponding row
					# index and increment the disappeared counter
					objectID = objectIDs[row]
					self.disappeared[objectID] += 1

					# check to see if the number of consecutive
					# frames the object has been marked "disappeared"
					# for warrants deregistering the object
					if self.disappeared[objectID] > self.maxDisappeared:
						self.deregister(objectID)

			# otherwise, if the number of input centroids is greater
			# than the number of existing object centroids we need to
			# register each new input centroid as a trackable object
			else:
				for col in unusedCols:
					self.register(inputCentroids[col])

		# return the set of trackable objects
		return self.objects

In [18]:
import torchvision.transforms as transforms
import cv2

trans = transforms.Compose([transforms.ToTensor()])

#Read videos (frames)
Vid = []
Vidcv = []
directory = 'project_data/ImagesVideo_2'

for file in sorted(os.listdir(directory)):
  filename = os.fsdecode(file)
  path = os.path.join(directory, filename)
  frame = Image.open(path).convert("RGB")
  Vid.append(frame)
  # transform frame to work with cv2
  frame = Image.fromarray(trans(frame).mul(255).permute(1, 2, 0).byte().numpy())
  frame = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
  Vidcv.append(frame)

In [19]:
import time

# initialize our centroid tracker and frame dimensions
ct = CentroidTracker()
#Set model to evaluation mode
model.eval()

# initialize the video loop
print("[INFO] starting video...")
Video_prediction = []
prevT = time.perf_counter()

# loop over the frames from the video
for n in range(len(os.listdir(directory))):
	# read the next frame from the video
  frame = Vid[n]
  #Find detected boundary boxes for frame
  with torch.no_grad():
    detections = model([trans(frame).to(device)])
  rects = []
  boxes = detections[0]['boxes'].cpu().numpy()
  IDs = detections[0]['labels'].cpu().numpy()

  # use other frame to work with cv2
  frame = Vidcv[n]

	# loop over the detections
  for i in range(0, detections[0]['boxes'].shape[0]):
    # compute the (x, y)-coordinates of the bounding box for
		# the object, then update the bounding box rectangles list
    box = boxes[i]
    rects.append(box.astype("int"))
		# draw a bounding box surrounding the object so we can
		# visualize it
    (startX, startY, endX, endY) = box.astype("int")
    if IDs[i] == 1:
      cv2.rectangle(frame, (startX, startY), (endX, endY),
                    (0, 255, 0), 2)
      cv2.putText(frame, 'beer', (startX+3, endY-3), 
                  cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, cv2.LINE_AA)
    elif IDs[i] == 2:
      cv2.rectangle(frame, (startX, startY), (endX, endY),
                    (0, 0, 255), 2)
      cv2.putText(frame, 'cola', (startX+3, endY-3), 
                  cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)

	# update our centroid tracker using the computed set of bounding
	# box rectangles
  objects = ct.update(rects)

	# loop over the tracked objects
  for (objectID, centroid) in objects.items():
		# draw both the ID of the object and the centroid of the
		# object on the output frame
    text = "ID {}".format(objectID)
    cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
    cv2.circle(frame, (centroid[0], centroid[1]), 4, (255, 255, 255), -1)
  
  # save the output frame
  Video_prediction.append(frame)

  # compute and print fps at every 100'th frame
  if n%100 == 0 and n != 0:
    T = time.perf_counter()-prevT
    print('Running at:', 100/T, 'fps')
    print('At frame number:', n)
    prevT = time.perf_counter()



# save the videofile
out = cv2.VideoWriter('video_predivtion.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, (640, 480))
for i in range(len(Video_prediction)):
    out.write(Video_prediction[i])
out.release()

[INFO] starting video...
Running at: 13.629308358606252 fps
At frame number: 100
Running at: 13.851028562443515 fps
At frame number: 200
Running at: 13.842360244256778 fps
At frame number: 300


In [20]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mon Jan  3 23:07:15 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   70C    P0    54W / 250W |   4087MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces