## Image Pairs dataset

This notebook takes random image pairs from videos, and saves them as a new dataset. An image pair are two frames which are either directly consecutive or within a few frames from each other.
The number of image pairs taken from a video depends on the estimated number of monkeys in the video.

In [2]:
import os
import re
import numpy as np
import cv2
import torch

import sys
if '/usr/users/vogg/.conda/envs/fairmot/lib/python3.8/site-packages' not in sys.path:
    sys.path.insert(0, '/usr/users/vogg/.conda/envs/fairmot/lib/python3.8/site-packages')
    sys.path.insert(0, '/usr/users/vogg/FairMOT/src')
    sys.path.insert(0, '/usr/users/vogg/FairMOT/src/lib')

from models.model import create_model, load_model
from models.decode import mot_decode, _nms, _topk
from utils.post_process import ctdet_post_process
from utils.image import get_affine_transform
from models.utils import _tranpose_and_gather_feat
from datasets.jde import letterbox

In [4]:
#This function estimates the number of monkeys for a given frame

def estimate_monkeys(frame_num):

    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    ret, img0 = cap.read()


    # Scale and fill grey borders
    img_box, _, _, _ = letterbox(img0, height=608, width=1088)

    # Normalize RGB
    img = img_box[:, :, ::-1].transpose(2, 0, 1)
    img = np.ascontiguousarray(img, dtype=np.float32)
    img /= 255.0

    im_blob = torch.from_numpy(img).cuda().unsqueeze(0)

    width = img0.shape[1]
    height = img0.shape[0]
    inp_height = im_blob.shape[2]
    inp_width = im_blob.shape[3]
    
    with torch.no_grad():
        output = model(im_blob)[-1]
        hm = output['hm'].sigmoid_()
        wh = output['wh']
        reg = output['reg']

    dets, inds = mot_decode(hm, wh, reg=reg, ltrb=True, K=50)

    # remove detections below the threshold

    remain_inds = dets[:, :, 4] > 0.4

    total = remain_inds.cpu().numpy().sum()
    
    return(total)

In [111]:
#!mkdir /usr/users/agecker/datasets/macaque_images

In [5]:
path = "/usr/users/agecker/datasets/macaque_videos_vogg/"

output_path = "/usr/users/agecker/datasets/macaque_images/"

In [96]:
mp4_list = []
for item in sorted(os.listdir(path)):
    if item.endswith(".mp4"):
        mp4_list.append(item)

#Remove videos which are in the validation set    
rm_list = ['VID_20210223_123630.mp4', 'VID_20210223_123817.mp4', 'VID_20210223_123854.mp4', 
        'VID_20210224_115455.mp4', 'VID_20210224_114038.mp4', 'VID_20210224_115729.mp4',
          'VID_20210227_133251.mp4', 'VID_20210227_133440.mp4', 'VID_20210228_153846.mp4']

mp4_list = list(set(mp4_list) - set(rm_list))

In [98]:
model = create_model('dla_34', heads =  {'hm': 1, 'wh': 4, 'id': 128, 'reg': 2}, 
                     head_conv = 256)

model = load_model(model, '../models/mcqcp/model_150.pth')
model = model.to(torch.device('cuda'))
model.eval()
print("")

loaded ../models/mcqcp/model_150.pth, epoch 150



In [159]:
count = 0
for n, video in enumerate(mp4_list):

    cap = cv2.VideoCapture(path + video)

    #Total number of frames
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    #width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)   # float `width`
    #height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float `height`
    breaks = np.max([1, int(np.floor(frame_count / 300))])
    steps = int(np.floor(frame_count / breaks))
    
    est_list = []

    for i in range(breaks):
        est_list.append(estimate_monkeys(i * steps))

    #max number of monkeys detected
    max_monkeys = np.max(est_list)

    #we will pull max_monkeys frames out of the video
    samples = np.max([1, max_monkeys])
    steps = int(np.floor(frame_count / samples))
    
    
    for i in range(samples):

        #decide if the pair is 1,2,5,10 or 20 frames apart
        rand_step = int(np.random.choice([1,2,5,10,20], size=1, p=[0.6,0.2,0.14,0.05,0.01]))

        cap.set(cv2.CAP_PROP_POS_FRAMES, i * steps)
        ret, img0 = cap.read()
        cv2.imwrite(output_path + "img_%s_0.jpg" % str(count).zfill(5), img0)     
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * steps + rand_step)
        ret, img1 = cap.read()
        cv2.imwrite(output_path + "img_%s_1.jpg" % str(count).zfill(5), img1) 
        count = count + 1
    
    if n%20 == 0:
        print(n)

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
