<a href="https://colab.research.google.com/github/YogJoshi14/opencv_task/blob/main/CV_infer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Model link:

[fastcnn with resnet50 backbone](https://drive.google.com/file/d/1ExpHnZaDPCLXrX00QNkYavxjAlcSeK09/view?usp=sharing)

In [None]:
from PIL import Image
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import cv2
import numpy as np
def get_model(num_classes):
    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model



DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CLASSES = ['laptop','lights']
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

# load the image from disk

model_path = '/content/drive/MyDrive/fcnnresnet50.pth' #path to model
model = get_model(len(CLASSES)+1)
model.load_state_dict(torch.load(model_path))
model.eval()
if torch.cuda.is_available():
    model.cuda()
else:
    model.cpu()


def detection_on_video(model,input,output):
  writer = None

  # initialize the frame dimensions (we'll set them as soon as we read
  # the first frame from the video)
  W = None
  H = None
  vs = cv2.VideoCapture(input)
  while True:
    # grab the next frame and handle if we are reading from either
    # VideoCapture or VideoStream
    _,frame = vs.read()
    if input is not None and frame is None:
        break
    orig = frame.copy()
    # convert the image from BGR to RGB channel ordering and change the
    # image from channels last to channels first ordering
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = image.transpose((2, 0, 1))
    # add the batch dimension, scale the raw pixel intensities to the
    # range [0, 1], and convert the image to a floating point tensor
    image = np.expand_dims(image, axis=0)
    image = image / 255.0
    image = torch.FloatTensor(image)
    # send the input to the device and pass the it through the network to
    # get the detections and predictions
    if torch.cuda.is_available():
      image = image.to(DEVICE)
    detections = model(image)[0]
    # loop over the detections
    for i in range(0, len(detections["boxes"])):
      # extract the confidence (i.e., probability) associated with the
      # prediction
      confidence = detections["scores"][i]
      # filter out weak detections by ensuring the confidence is
      # greater than the minimum confidence
      if confidence > 0.6:
          # extract the index of the class label from the detections,
          # then compute the (x, y)-coordinates of the bounding box
          # for the object
          idx = int(detections["labels"][i])-1
          box = detections["boxes"][i].detach().cpu().numpy()
          (startX, startY, endX, endY) = box.astype("int")
          # display the prediction to our terminal
          label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
          print("[INFO] {}".format(label))
          # draw the bounding box and label on the image
          cv2.rectangle(orig, (startX, startY), (endX, endY),
              COLORS[idx], 2)
          y = startY - 15 if startY - 15 > 15 else startY + 15
          cv2.putText(orig, label, (startX, y),
              cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
    # show the output image

    # if the frame dimensions are empty, set them
    if W is None or H is None:
      (H, W) = frame.shape[:2]

    # if we are supposed to be writing a video to disk, initialize
    # the writer
    if input is not None and writer is None:
      fourcc = cv2.VideoWriter_fourcc(*"MJPG")
      writer = cv2.VideoWriter(output, fourcc, 30,
        (W, H), True)
    if writer is not None:
      writer.write(orig)

	# # show the output frame
  #   cv2.imshow("Frame", frame)
  #   key = cv2.waitKey(1) & 0xFF
  # plt.imshow(orig)
  #   # if the `q` key was pressed, break from the loop
  #   if key == ord("q"):
  #     break
  if writer is not None:
    writer.release()
  # vs.release()

  # close any open windows
  # cv2.destroyAllWindows()

In [None]:
input = '/content/drive/MyDrive/lap.mp4' #input video
output = '/content/drive/MyDrive/output_lap.mp4' #output video
detection_on_video(model,input,output)