This notebook takes in a salmon video stream and a keypoint rcnn model,

This notebook generates the following:
1. An annotated video of the detections of keypoint rcnn.
2. Annotated frames of the detections of keypoint rcnn.
3. Keypoint aligned fish toraxes.
4. trackers of the fish detections.

All data is saved to disk.

In [1]:
# Imports
import sys  
sys.path.insert(0, 'C:\\Users\\espebh\\Documents\\Thesis\\code_ver2')
import cv2
import numpy as np
import torch
import torchvision
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as T
import math
import pickle
import os
import pandas as pd
import math
import datetime
import colorsys
from scipy.special import softmax
import albumentations as alb
import json
from helpers.formatting_functions import cv2_to_keyrcnn_in
from helpers import m
from helpers.video_monitor import VideoMonitor

In [5]:
# Specify the paths where the different outputs should be saved
vid_in_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\videos for clf training\\tank 3\\280922_t3_c4_1t_s1.mp4'

vid_out_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\semiautomatically generated data\\unlabelled data\\tank_9_and_3\\videos'
ann_imgs_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\semiautomatically generated data\\unlabelled data\\tank_9_and_3\\images\\annotated_frames'
tor_imgs_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\semiautomatically generated data\\unlabelled data\\tank_9_and_3\\images\\torax'
tracker_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\semiautomatically generated data\\unlabelled data\\tank_9_and_3\\trackers_unlabelled'

keyrcnn_man_label_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\keyrcnn_200_dataset\\manually annotated data'


In [6]:
# Load model
model = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=None,
                                                               weights_backbone=torchvision.models.ResNet50_Weights.IMAGENET1K_V1,
                                                               num_keypoints=m.NUM_KEYPOINTS,
                                                               num_classes = m.NUM_CLASSES_T9)
model.load_state_dict(torch.load(r"C:\\Users\\espebh\\Documents\\Thesis\\data\\models\\keyrcnn\\13_epoch_6000_dataset", map_location=m.DEVICE))
model.to(m.DEVICE).eval()

KeypointRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(640, 672, 704, 736, 768, 800), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, 

In [7]:
# Define section of movie that will be used
start_frame = 4500
num_frames_in_video = 500
flip = False

# Load input video
video_in = cv2.VideoCapture(vid_in_path)
print("Number of frames in input video: " + str(video_in.get(cv2.CAP_PROP_FRAME_COUNT)))
video_in.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Generate movie name
vid_name = vid_in_path.split('\\')[-1].split('.')[0]
mov_txt = vid_name + 'f_' + str(start_frame) + '_' + str(start_frame + num_frames_in_video) + ' '
date = datetime.datetime.now()
ID = str(date.year) + str(date.month) + str(date.day) + str(date.hour) + str(date.minute)
movie_name = mov_txt + ID

# Open output video
video_out = cv2.VideoWriter(os.path.join(vid_out_path, movie_name) + '.avi', cv2.VideoWriter_fourcc(*'mp4v'), 100.0, (1920, 1080), True)
print('Saving movie as: ' + movie_name)

# Create directories for storing images
os.mkdir(os.path.join(ann_imgs_path, movie_name))
os.mkdir(os.path.join(tor_imgs_path, movie_name))

VM = VideoMonitor(None)

Number of frames in input video: 42420.0
Saving movie as: 280922_t3_c4_1t_s1f_4500_5000 2023591054


In [8]:
# Create a video with keypoint and bbox annotations
while True:
    suc, img = video_in.read() # Read video frame
    if suc:
        # cv2 to tensor
        img_cv2 = img.copy()
        if flip: 
            img_cv2 =  cv2.rotate(img_cv2, cv2.ROTATE_180)
        img_keyrcnn_in = cv2_to_keyrcnn_in(img_cv2.copy())
        
        # Pass frame through neural network
        output = model([img_keyrcnn_in])
        VM.update_trackers(output[0])

        # Track fish
        img_ann = VM.draw_active_tracker(img_keyrcnn_in, classes_map=m.CLASSES_MAP_T3)

        # Store image to video and disk
        video_out.write(np.uint8(img_ann))
        frame = str(video_in.get(cv2.CAP_PROP_POS_FRAMES)).split('.')[0]
        cv2.imwrite(os.path.join(ann_imgs_path, movie_name, frame + '.jpeg'),img_ann.copy())

        # Generate salmon torax image
        try:
            imgs, IDs, _, _ = VM.warp_active_tracker_fish(img_cv2.copy(), VM.base)
        except:
            print('Couldnt warp images')

        # Store salmon torax image
        try:
            for i in range(len(imgs)):
                cv2.imwrite(os.path.join(tor_imgs_path, movie_name, frame + '.' + str(IDs[i]) + '.jpeg'), imgs[i]) 
        except:
            print('Empty annotation array')
    else:
        # If the capture object cannot read a frame, exit loop
        break

    # Print number of frames
    if int(video_in.get(cv2.CAP_PROP_POS_FRAMES)) % 10 == 0:
        print("Current frame number: " + str(video_in.get(cv2.CAP_PROP_POS_FRAMES)))

    # Exit loop if we have enough frames
    if video_in.get(cv2.CAP_PROP_POS_FRAMES) > start_frame + num_frames_in_video:
        break

# Release capture object and windows
video_in.release()
video_out.release()
cv2.destroyAllWindows()
print("The video was successfully saved")

Current frame number: 4510.0
Current frame number: 4520.0
Current frame number: 4530.0
Current frame number: 4540.0
Current frame number: 4550.0
Current frame number: 4560.0
Current frame number: 4570.0
Current frame number: 4580.0
Current frame number: 4590.0
Current frame number: 4600.0
Current frame number: 4610.0
Current frame number: 4620.0
Current frame number: 4630.0
Current frame number: 4640.0
Current frame number: 4650.0
Current frame number: 4660.0
Current frame number: 4670.0
Current frame number: 4680.0
Current frame number: 4690.0
Current frame number: 4700.0
Current frame number: 4710.0
Current frame number: 4720.0
Current frame number: 4730.0
Current frame number: 4740.0
Current frame number: 4750.0
Current frame number: 4760.0
Current frame number: 4770.0
Current frame number: 4780.0
Current frame number: 4790.0
Current frame number: 4800.0
Current frame number: 4810.0
Current frame number: 4820.0
Current frame number: 4830.0
Current frame number: 4840.0
Current frame 

In [9]:
# Save trackers
log_file = open(os.path.join(tracker_path, movie_name + '.pkl'), 'wb')
pickle.dump(VM.log_trackers+VM.active_trackers, log_file)
log_file.close()