In [10]:
# Imports
import sys  
sys.path.insert(0, 'C:\\Users\\espebh\\Documents\\Thesis\\code')
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as T
import math
import pickle
import os
import pandas as pd
import math
import datetime
from scipy.signal import butter, lfilter
import colorsys
from scipy.special import softmax
from helpers.track_utils import bb_intersection_over_union, update_trackers
from helpers import m
from helpers.draw_utils import draw_label_on_single_img, draw_active_tracker, draw_cross
from helpers.formatting_functions import cv2_to_keyrcnn_in
from helpers.torax_transform_utils import extract_target_transformation
from helpers.video_monitor import VideoMonitor
from helpers.prob_utils import prob_random_clsf


In [15]:
# Specify the paths where the different outputs should be saved
vid_in_path = r'C:\\Users\\espebh\\Documents\\Thesis\\videos for keyrcnn labelling'
vid_name = '2t tank 9.mp4'
vid_out_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\Temporary workspace'
tracker_out_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\Temporary workspace'

keyrcnn_man_label_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\keyrcnn_200_dataset\\manually annotated data'
keyrcnn_model_path = r'C:\\Users\\espebh\\Documents\\Thesis\\data\\models\\keyrcnn\\13_epoch_6000_dataset'

In [16]:
# Load models
keyrcnnmodel = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=None,
                                                               weights_backbone=torchvision.models.ResNet50_Weights.IMAGENET1K_V1,
                                                               num_keypoints=m.NUM_KEYPOINTS,
                                                               num_classes = m.NUM_CLASSES)
keyrcnnmodel.load_state_dict(torch.load(keyrcnn_model_path, map_location=m.DEVICE))
keyrcnnmodel.to(m.DEVICE).eval()

toraxIDmodel = torchvision.models.resnet101(pretrained=True)
toraxIDmodel.fc = nn.Linear(in_features=2048, out_features=m.NUM_CLASSES)
toraxIDmodel.load_state_dict(torch.load('C:\\Users\\espebh\\Documents\\Thesis\\data\\models\\torax_ID_t9\\1000_dataset_10_epoch_RGB_resnet101', map_location=m.DEVICE))
toraxIDmodel.to(m.DEVICE).eval()



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [17]:
# Specify the video bounds
start_frame = 10
num_frames_in_video = 10

In [18]:
video_in = cv2.VideoCapture(os.path.join(vid_in_path, vid_name))
print("Number of frames in input video: " + str(video_in.get(cv2.CAP_PROP_FRAME_COUNT)))
video_in.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Generate movie name
mov_txt = vid_name.split('.')[0] + '_f_' + str(start_frame) + '_' + str(start_frame + num_frames_in_video) + '_'
date = datetime.datetime.now()
ID = str(date.year) + str(date.month) + str(date.day) + str(date.hour) + str(date.minute)
movie_name = mov_txt + ID

video_out = cv2.VideoWriter(os.path.join(vid_out_path, movie_name + '.avi'), cv2.VideoWriter_fourcc(*'mp4v'), 100.0, (1920, 1080), True)
print('Saving movie as: ' + movie_name)

VM = VideoMonitor(toraxIDmodel)

# Create a video with keypoint and bbox annotations
while True:
    suc, img = video_in.read() # Read video frame
    if suc:
        # cv2 to tensor
        img_cv2 = img.copy()
        img_keyrcnn_in = cv2_to_keyrcnn_in(img_cv2.copy())
        
        # Pass frame through neural network
        output = keyrcnnmodel([img_keyrcnn_in])
        VM.update_trackers(output[0])
        VM.update_classes_and_dists(img_cv2.copy(), video_in.get(cv2.CAP_PROP_POS_FRAMES))
        img_ann = VM.draw_active_tracker(img_keyrcnn_in)

        # Store image to video and disk
        video_out.write(np.uint8(img_ann))
        frame = str(video_in.get(cv2.CAP_PROP_POS_FRAMES)).split('.')[0]
    else:
        # If the capture object cannot read a frame, exit loop
        break

    # Print number of frames
    if int(video_in.get(cv2.CAP_PROP_POS_FRAMES)) % 10 == 0:
        print("Number of frames: " + str(video_in.get(cv2.CAP_PROP_POS_FRAMES)))

    # Exit loop if we have enough frames
    if video_in.get(cv2.CAP_PROP_POS_FRAMES) > start_frame + num_frames_in_video:
        break

# Release capture object and windows
video_in.release()
video_out.release()

log_file_trackers = open(os.path.join(tracker_out_path, movie_name + '.pkl'), 'wb')
pickle.dump(VM.log_trackers+VM.active_trackers, log_file_trackers)
log_file_trackers.close()

print('The video ', movie_name, ' was successfully saved')


Number of frames in input video: 42420.0
Saving movie as: GH020071_f_10_20_20233221610
Number of frames: 20.0
The video  GH020071_f_10_20_20233221610  was successfully saved
