# Installations & Util

## Install Requirements

In [None]:
!python --version

In [None]:
!pip install --upgrade pip

In [None]:
!pip install -r requirements.txt

In [None]:
!pip install torch==1.1.0 torchvision==0.2.1 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
!pip list --format=freeze

In [None]:
!pip freeze | grep torch

## Initialize Notebook

In [None]:
import ipywidgets as widgets
from IPython.display import Javascript

outputInit = widgets.Output()

btnInit = widgets.Button(
    description='Initialize Notebook',
    disabled=False,
    button_style='info',
)

def runInit(state):
    display(Javascript('IPython.notebook.execute_cells_below()'))
    with outputInit:
        print("Running notebook initialization..")

# btnInit.observe(runInit)
btnInit.on_click(runInit)
# ========== END TOGGLE BTN END ==========
display(btnInit, outputInit)

## Code Hiding

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML
# ========== TOGGLE BTN ==========
javascript_functions = {False: "hide()", True: "show()"}
button_descriptions  = {False: "Show code", True: "Hide code"}

def toggle_code(state):
    """
    Toggles the JavaScript show()/hide() function on the div.input element.
    """
    output_string = "<script>$(\"div.input\").{}</script>"
    output_args   = (javascript_functions[state],)
    output        = output_string.format(*output_args)
    display(HTML(output))

def button_action(value):
    """
    Calls the toggle_code function and updates the button description.
    """
    state = value.new
    toggle_code(state)
    value.owner.description = button_descriptions[state]
    
state = False
toggle_code(state)

button = widgets.ToggleButton(state, description = button_descriptions[state])
button.observe(button_action, "value")
# ========== END TOGGLE BTN END ==========
display(button)

# TSU Pipeline
This section in the notebook is delicated for the TSU pipline.

## Data Exploration
This section in the notebook allows the user to load and display video data from the Toyota Smarthome (TSU) project (https://project.inria.fr/toyotasmarthome/).

### File Upload

In [None]:
import ipywidgets as widgets
from ipywidgets import Button
from IPython.display import display, HTML
from ipyfilechooser import FileChooser
import zipfile as zf
import shutil

# https://thispointer.com/python-how-to-unzip-a-file-extract-single-multiple-or-all-files-from-a-zip-archive/#:~:text=Extract%20all%20files%20from%20a%20zip%20file%20to%20different%20directory,can%20be%20relative%20or%20absolute.&text=It%20will%20extract%20all%20the%20files%20in%20%27sample,zip%27%20in%20temp%20folder.

# ===================== FileChooser =====================
# Create and display a FileChooser widget
fc = FileChooser('/')

# === Change defaults and reset the dialog ===
# fc.default_path = './'
# fc.reset()

# === Restrict navigation to /Users ===
fc.sandbox_path = '/'

# === Change hidden files ===
fc.show_hidden = False

# === Switch to folder-only mode ===
fc.show_only_dirs = False

# === Set multiple file filter patterns (uses https://docs.python.org/3/library/fnmatch.html) ===
fc.filter_pattern = ['*.csv', '*.mp4', '*.zip', '*.json']
# fc.filter_pattern = ['*.zip']

# === Change the title (use '' to hide) ===
fc.title = '<p>Select File (*.csv, *.txt, *.mp4, *.json, *.zip)<br>Use *.zip for multiple file uploads.<br></p><hr>'

# === Callback function ===
# def change_display_selected(chooser):
#     print(fc.selected_filename, end='\r')

# === Register callback function ===
# fc.register_callback(change_display_selected)
# ================ END FileChooser END ================
# ================ BTN CLICK UPLOAD ================
def on_button_clicked_upload(b):
#     print(fc.selected)
    fn = fc.selected_filename
    outputPath = ""
    if fn.endswith('.zip'):
        fn = fc.selected_filename.replace('.zip', '')
        outputPath = fn + " has been extracted and uploaded to:"
#         files = zf.ZipFile(fc.selected_path+"\\"+fc.selected_filename, 'r')
#         files.extractall('Data Folder\\'+fn)
#         files.close()

        with zf.ZipFile(fc.selected_path+"\\"+fc.selected_filename, 'r') as zipObj:
           # Get a list of all archived file names from the zip
            listOfFileNames = zipObj.namelist()
           # Iterate over the file names
            for fileName in listOfFileNames:
                # Check filename endswith csv
#                 if  fileName.endswith('.png') or fileName.endswith('.jpg') or fileName.endswith('.JPG'):
#                     # Extract a single file from zip
#                     zipObj.extract(fileName, 'Data_Folder\\Images')
#                     outputPath += "\n" + fileName + " >> Data_Folder\\Images"
                if fileName.endswith('.mp4'):
                    # Extract a single file from zip
                    zipObj.extract(fileName, 'Data_Folder\\Videos')
                    outputPath += "\n" + fileName + " >> Data_Folder\\Videos"
                elif fileName.endswith('.txt') or fileName.endswith('.csv') or fileName.endswith('.xlsx') or fileName.endswith('.json'):
                    zipObj.extract(fileName, 'Data_Folder\\Dataset')
                    outputPath += "\n" + fileName + " >> Data_Folder\\Dataset"
                else:
                    zipObj.extract(fileName, 'Data_Folder')
                    outputPath += "\n" + fileName + " >> Data_Folder"
    else:
        if fn.endswith('.png') or fn.endswith('.jpg') or fn.endswith('.JPG'):
            shutil.copyfile(fc.selected_path+"\\"+fc.selected_filename, 'Data_Folder\\Images\\'+fc.selected_filename)
            outputPath = fn + " has been uploaded to >> Data_Folder\\Images"
        elif fn.endswith('.mp4'):  
            shutil.copyfile(fc.selected_path+"\\"+fc.selected_filename, 'Data_Folder\\Videos\\'+fc.selected_filename)
            outputPath = fn + " has been uploaded to >> Data_Folder\\Videos"
        elif fn.endswith('.txt') or fn.endswith('.csv') or fn.endswith('.xlsx') or fn.endswith('.json'):
            shutil.copyfile(fc.selected_path+"\\"+fc.selected_filename, 'Data_Folder\\Dataset\\'+fc.selected_filename)
            outputPath = fn + " has been uploaded to >> Data_Folder\\Dataset"
        else:
            shutil.copyfile(fc.selected_path+"\\"+fc.selected_filename, 'Data_Folder\\'+fc.selected_filename)
            outputPath = fn + " has been uploaded to >> Data_Folder"
    with output:
#         print(fn + " >> " + outputPath + ".")
        print("***SUCCESS*** \n" + outputPath)
# ================ BTN CLICK UPLOAD END ================
# =============== CONFIRM BTN & OUTPUT ===============
confirmBtn = widgets.Button(
    description='Confirm Upload',
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Confirm',
)
output = widgets.Output()
confirmBtn.on_click(on_button_clicked_upload)
# =============== CONFIRM BTN & OUTPUT END ===============
display(fc)
display(confirmBtn, output)

### Video Preview

In [None]:
from ipyfilechooser import FileChooser
from IPython.display import display, HTML, Video
import ipywidgets as widgets


output_test = widgets.Output()
# ========== FileChooser ==========
# Create and display a FileChooser widget
fc_test = FileChooser('./Data_Folder/Videos')
display(fc_test)

# Restrict navigation to /Users
fc_test.sandbox_path = './Data_Folder/Videos'

# Change hidden files
fc_test.show_hidden = False

# Switch to folder-only mode
fc_test.show_only_dirs = False

# Change the title (use '' to hide)
fc_test.title = '<b>Select input video</b>'

# Sample callback function
def change_display_selected(chooser):
    with output_test:        
        # Path to video
        video_path = './datasets/'+fc_test.selected_filename

        # Video playback for preview
        display(HTML("""<video controls src=""" + video_path + """ type="video/mp4" width=100%><video/>"""))
    
# Register callback function
fc_test.register_callback(change_display_selected)
# ========== END FileChooser END ==========
display(output_test)

## Inference
This section in the notebook allows the user to perform inference using a pretrained HOI ML
model based on the TSU project.

In [None]:
import cv2
import pandas as pd
import ipywidgets as widgets
import zipfile as zf
import numpy as np
from IPython import display
from IPython.display import display, HTML, IFrame, clear_output
from ipyfilechooser import FileChooser
from moviepy.editor import VideoFileClip
import torch
from torch.autograd import Variable
import torch.nn.functional as F

# import importlib.util

# spec = importlib.util.spec_from_file_location("train", "./Training/train.py")
# train = importlib.util.module_from_spec(spec)
# spec.loader.exec_module(train)
import re
import csv
import json

# Load the label text file into a list
def load_labels():
    with open("./Testing/data/all_labels.txt") as file_in:
        lines = []
        for line in file_in:
            line = re.sub(r'\d+', '', line)
            line = line.strip()
            lines.append(line)
        return lines

# Turn ground truth/annotation to csv
def json_to_csv(vid_name, split_mode):
    
    # Load event label text file provided
    event_list = load_labels()
    
    # Remove extention from name
    video_name = vid_name[0:-4]
    
    # Open json file
    with open('./Testing/data/smarthome_'+split_mode+'_51.json') as json_file:
        data = json.load(json_file)
    
    # Header for csv
    fields = ['annotation', 'start_frame', 'end_frame'] 
    
    # For an item in JSON (is a string of name of video)
#     for action in data:
        
        # Load actions from data using action item
#         caption_data = data[str(action)]['actions']
    if data[video_name]['subset'] == 'testing':
        caption_data = data[video_name]['actions']

        # Open csv file object as variable
    #     with open('./Data_Folder/Annotations/annotation_'+str(action)+'.csv', 'w') as data_file:
        with open('./Data_Folder/Annotations/annotation_'+video_name+'.csv', 'w') as data_file:
            # Clean existing file if it is not empty
            data_file.truncate()
            csv_writer = csv.writer(data_file)
            csv_writer.writerow(fields)
            label_list = []
            # Loop to change the label number to the corresponding name from a list created from label txt file
            for action_list in caption_data:
                action_list[0] = event_list[int(action_list[0])]
                # Append updated list item into new list
                label_list.append(action_list)
            # Write new list to csv
            csv_writer.writerows(label_list)

            data_file.close()

            # Code to clear up empty rows
    #         df = pd.read_csv('./Data_Folder/Annotations/annotation_'+str(action)+'.csv')
    #         df.to_csv('./Data_Folder/Annotations/annotation_'+str(action)+'.csv', index=False)
            df = pd.read_csv('./Data_Folder/Annotations/annotation_'+video_name+'.csv')
            df.to_csv('./Data_Folder/Annotations/annotation_'+video_name+'.csv', index=False)

            return './Data_Folder/Annotations/annotation_'+video_name+'.csv'
    else:
        return ''

# Function to check dataframe of caption from testing model and ground truth
def check_df_caption_ground(df_caption, df_ground):
    cap = df_caption
    gro = df_ground
    
    # New dict to turn to dataframe at end
    new_cap = {'captions': [], 'match': []}
    
    # Get length of dataframe
    totalFrames = len(cap.index)
    
    totalFrames_g = len(gro.index)
    
    # Iterate while index is in range of length of dataframe
    for index in range(totalFrames):
        new_cap['captions'].append(cap['captions'][index])
        if index < totalFrames_g:
            # Check if the action matches
            if cap['captions'][index] == gro['annotation'][index]:
                new_cap['match'].append('T')
            else:
                new_cap['match'].append('F')
        else:
            new_cap['match'].append('F')
    
    # Return index and row in series
    return pd.DataFrame(new_cap).iterrows()
    

# Check if caption is for video
def check_csv_vidname(vid_name, caption):
    vid = vid_name[0:-4]
    cap = caption[8:-4]
    if vid == cap:
        return True
    else:
        return False
    
# To parse caption/annotation csv into dataframe
def parseCSV(caption, type):
    df = pd.read_csv(caption)
    if type == "c":
        header = 'captions'
    elif type == "g":
        header = 'annotation'
    captionsByFrame = {header: []}
    captions = ""
    totalFrames = len(df.index)
    initialStartFrame = 0
    
    # Iterate through to create new df with regards to start_frame, end_frame variable in csv
    for index in range(totalFrames):
        # Update startframe at start when move to next row of dataframe from csv
        newStartFrame = int(df['start_frame'][index]) - initialStartFrame
        for i in range(newStartFrame):
            captionsByFrame[header].append(captions)
        captions = str(df[header][index])
        initialStartFrame += newStartFrame
        if index == totalFrames-1:
            newStartFrame = int(df['end_frame'][index]) - initialStartFrame
            for j in range(newStartFrame):
                captionsByFrame[header].append(captions)

    return pd.DataFrame(captionsByFrame)

# Insert text into video
def output(frame):
    color = (255, 255, 255)
    value = ''
    try:
        # Assign this here as if call again will skip
        value = next(df_new)[1]
        # Check match column of df to change color to Green if T
        if str(value.match) == "T":
            color = (0, 255, 0)
        # Change color to red here
        else:
            color = (255, 0, 0)
        cv2.putText(frame, "Action: " + str(value.captions), position,
                    cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)
    except StopIteration:
        pass
    
    return frame

def input(video, cap_csv, ground_csv, outputFilename):
    # Path to video
    video_path = video

    # Capture video
    cap = cv2.VideoCapture(video_path)

    # Get video height and width from captured video
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))

    # Set position for text with offset of text size
    global position
    position = ((int((frame_width/4)-(268/2))), int(((frame_height/1.15)-(36/2))))

    video = VideoFileClip(video_path)

    # Path to caption
    caption = cap_csv
    
    # Path to ground truth
    ground = ground_csv

    global df_new
    # Store caption df
    df = parseCSV(caption, 'c')
    
    # Store ground truth df
    df_ground = parseCSV(ground, 'g')
    
    # Store caption df with match column
    df_new = check_df_caption_ground(df, df_ground)

    # Modifies the images of a clip by replacing the frame
    out_video = video.fl_image(output)

    # Write edited video to file
    out_video.write_videofile(outputFilename, audio=True)

    # Close video file clip
    out_video.close()

    return "Added captions to video"



### Select Model for Inference
The below code allows the user to select model(s) for inference only from the trained_models folder. After selection, the directory path and file name is used in the Run code for selected model.

In [None]:
# Install the following packages for HTML interactive widgets for Jupyter notebook
import ipywidgets as widgets
from IPython.display import display, IFrame, Video, clear_output
from ipyfilechooser import FileChooser

selectOutput = widgets.Output()

# (Split Setting Dropdown) for user to select
splitSettingInfer = widgets.Dropdown(
    options=['CS', 'CV'],
    value='CS',
    description='Split Setting Used on model:',
    disabled=False,
)
display(splitSettingInfer)


# ========== csvSelect ==========
# Create and display a FileChooser widget
csvSelect = FileChooser('./Data_Folder/Captions')
display(csvSelect)

# Restrict navigation to /Users
csvSelect.sandbox_path = './Data_Folder/Captions'

# Change hidden files
csvSelect.show_hidden = False

# Switch to folder-only mode
csvSelect.show_only_dirs = False

# Change the title (use '' to hide)
csvSelect.title = '<b>Select CSV file to use for ground truth</b>'

# Sample callback function
def change_display_selected_csvSelect(chooser):
    with selectOutput:
        print(csvSelect.selected_filename, end='\r')

# Register callback function
csvSelect.register_callback(change_display_selected_csvSelect)
# ========== END csvSelect END ==========



# ========== vidSelect ==========
# Create and display a FileChooser widget
vidSelect = FileChooser('./Data_Folder/Videos')
display(vidSelect)

# Restrict navigation to /Users
vidSelect.sandbox_path = './Data_Folder/Videos'

# Change hidden files
vidSelect.show_hidden = False

# Switch to folder-only mode
vidSelect.show_only_dirs = False

# Change the title (use '' to hide)
vidSelect.title = '<b>Select Video File</b>'

# Sample callback function
def change_display_selected_vidSelect(chooser):
    with selectOutput:
        print(vidSelect.selected_filename, end='\r')

# Register callback function
vidSelect.register_callback(change_display_selected_vidSelect)
# ========== END vidSelect END ==========



# ========== modelSelect ==========
# Create and display a FileChooser widget
modelSelect = FileChooser('./Trained_models/PDAN/')
display(modelSelect)

# Restrict navigation to /Users
modelSelect.sandbox_path = '../'

# Change hidden files
modelSelect.show_hidden = False

# Switch to folder-only mode
modelSelect.show_only_dirs = False

# Change the title (use '' to hide)
modelSelect.title = '<b>Select model file to use</b>'

# Sample callback function
def change_display_selected_modelSelect(chooser):
    with selectOutput:
        print(modelSelect.selected_filename, end='\r')

# Register callback function
modelSelect.register_callback(change_display_selected_modelSelect)
# ========== END modelSelect END ==========



trainButton = widgets.Button(description='Run',
                             layout=widgets.Layout(margin='20px 0px 0px 0px'))
btnOutput = widgets.Output()

def on_button_clicked(_):
    # "linking function with output"
    with btnOutput:
        # what happens when we press the button
        clear_output()
        try:
            # Get annotate file path
            annotate_path = json_to_csv(vidSelect.selected_filename, splitSettingInfer.value)
            if check_csv_vidname(vidSelect.selected_filename, csvSelect.selected_filename):
                vidPath = vidSelect.selected_path + "/" + vidSelect.selected_filename
                csvPath = csvSelect.selected_path + "/" + csvSelect.selected_filename
                modelPath = modelSelect.selected_path + "/" + modelSelect.selected_filename
                %run -i ./Inferencing/inferencing_cpu.py -dataset TSU -mode rgb -split_setting $splitSettingInfer.value -model PDAN -test True -num_channel 512 -lr 0.0002 -kernelsize 2 -APtype map -epoch 1 -batch_size 1 -comp_info TSU_CS_RGB_PDAN -load_model $modelPath

        #         outputFilename = vidSelect.selected_filename[:-4] + "_output.mp4"
                outputFilename = "output.mp4"
                input(vidPath, csvPath, annotate_path, outputFilename)
                # print(vidSelect.selected_filename)
                display(HTML("""
                    <video controls src="output.mp4" type="video/mp4" width=100%>
                    <video/>
                    """
                ))
            else:
                print("Video name and csv do not match please try again")
        except:
            print('Something has gone wrong please try again')
        
        
# linking button and function together using a button's method
trainButton.on_click(on_button_clicked)
# displaying button and its output together
widgets.VBox([trainButton, btnOutput])


# STEP Pipeline
This section in the notebook is delicated for the STEP pipline.

## To be added for STEP pipeline stuff..

In [None]:
!python setup.py build develop

In [None]:
"""
Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
"""

import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision
import numpy as np
from collections import OrderedDict
import time
from datetime import datetime
#from tensorboardX import SummaryWriter
import glob

from config import parse_config
from models import BaseNet, ROINet, TwoBranchNet, ContextNet
from external.maskrcnn_benchmark.roi_layers import nms
from utils.utils import inference, train_select, AverageMeter, get_gpu_memory
from utils.tube_utils import flatten_tubes, valid_tubes, compute_box_iou
from utils.vis_utils import overlay_image
from data.customize import CustomizedDataset, detection_collate, WIDTH, HEIGHT
from data.augmentations import BaseTransform



def main():

    ################## Customize your configuratons here ###################

    checkpoint_path = './pretrained/ava_step.pth'
    if os.path.isfile(checkpoint_path):
        print ("Loading pretrain model from %s" % checkpoint_path)
        map_location = 'cuda:0'
        checkpoint = torch.load(checkpoint_path, map_location=map_location)
        args = checkpoint['cfg']
    else:
        raise ValueError("Pretrain model not found!", checkpoint_path)

    # TODO: Set data_root to the customized input dataset
    args.data_root = './datasets/frames'
    args.save_root = os.path.join(os.path.dirname(args.data_root), 'results/')
    if not os.path.isdir(args.save_root):
        os.makedirs(args.save_root)

    # TODO: modify this setting according to the actual frame rate and file name
    source_fps = 30
    im_format = 'frame%04d.jpg'
    conf_thresh = 0.4
    global_thresh = 0.8    # used for cross-class NMS
    
    ################ Define models #################

    gpu_count = torch.cuda.device_count()
    nets = OrderedDict()
    # backbone network
    nets['base_net'] = BaseNet(args)
    # ROI pooling
    nets['roi_net'] = ROINet(args.pool_mode, args.pool_size)

    # detection network
    for i in range(args.max_iter):
        if args.det_net == "two_branch":
            nets['det_net%d' % i] = TwoBranchNet(args)
        else:
            raise NotImplementedError
    if not args.no_context:
        # context branch
        nets['context_net'] = ContextNet(args)

    for key in nets:
        nets[key] = nets[key].cuda()

    nets['base_net'] = torch.nn.DataParallel(nets['base_net'])
    if not args.no_context:
        nets['context_net'] = torch.nn.DataParallel(nets['context_net'])
    for i in range(args.max_iter):
        nets['det_net%d' % i].to('cuda:%d' % ((i+1)%gpu_count))
        nets['det_net%d' % i].set_device('cuda:%d' % ((i+1)%gpu_count))

    # load pretrained model 
    nets['base_net'].load_state_dict(checkpoint['base_net'])
    if not args.no_context and 'context_net' in checkpoint:
        nets['context_net'].load_state_dict(checkpoint['context_net'])
    for i in range(args.max_iter):
        pretrained_dict = checkpoint['det_net%d' % i]
        nets['det_net%d' % i].load_state_dict(pretrained_dict)

    
    ################ DataLoader setup #################

    args.batch_size = 4

    dataset = CustomizedDataset(args.data_root, args.T, args.NUM_CHUNKS[args.max_iter], source_fps, args.fps, BaseTransform(args.image_size, args.means, args.stds,args.scale_norm), anchor_mode=args.anchor_mode, im_format=im_format)
    dataloader = torch.utils.data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers,
                                  shuffle=False, collate_fn=detection_collate, pin_memory=True)

    ################ Inference #################

    for _, net in nets.items():
        net.eval()

    fout = open(os.path.join(args.save_root, 'results.txt'), 'w')
    torch.cuda.synchronize()
    t0 = time.time()
    with torch.no_grad():
        for _, (images, tubes, infos) in enumerate(dataloader):

            _, _, channels, height, width = images.size()
            images = images.cuda()

            # get conv features
            conv_feat = nets['base_net'](images)
            context_feat = None
            if not args.no_context:
                context_feat = nets['context_net'](conv_feat)

            history, _ = inference(args, conv_feat, context_feat, nets, args.max_iter, tubes)

            # collect result of the last step
            pred_prob = history[-1]['pred_prob'].cpu()
            pred_prob = pred_prob[:,int(pred_prob.shape[1]/2)]
            pred_tubes = history[-1]['pred_loc'].cpu()
            pred_tubes = pred_tubes[:,int(pred_tubes.shape[1]/2)]
            tubes_nums = history[-1]['tubes_nums']

            # loop for each batch
            tubes_count = 0
            for b in range(len(tubes_nums)):
                info = infos[b]
                seq_start = tubes_count
                tubes_count = tubes_count + tubes_nums[b]

                cur_pred_prob = pred_prob[seq_start:seq_start+tubes_nums[b]]
                cur_pred_tubes = pred_tubes[seq_start:seq_start+tubes_nums[b]]

                # do NMS first
                all_scores = []
                all_boxes = []
                all_idx = []
                for cl_ind in range(args.num_classes):
                    scores = cur_pred_prob[:, cl_ind].squeeze()
                    c_mask = scores.gt(conf_thresh) # greater than a threshold
                    scores = scores[c_mask]
                    idx = np.where(c_mask.numpy())[0]
                    if len(scores) == 0:
                        all_scores.append([])
                        all_boxes.append([])
                        continue
                    boxes = cur_pred_tubes.clone()
                    l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                    boxes = boxes[l_mask].view(-1, 4)

                    boxes = valid_tubes(boxes.view(-1,1,4)).view(-1,4)
                    keep = nms(boxes, scores, args.nms_thresh)
                    boxes = boxes[keep].numpy()
                    scores = scores[keep].numpy()
                    idx = idx[keep]

                    boxes[:, ::2] /= width
                    boxes[:, 1::2] /= height
                    all_scores.append(scores)
                    all_boxes.append(boxes)
                    all_idx.append(idx)

                # get the top scores
                scores_list = [(s,cl_ind,j) for cl_ind,scores in enumerate(all_scores) for j,s in enumerate(scores)]
                if args.evaluate_topk > 0:
                    scores_list.sort(key=lambda x: x[0])
                    scores_list = scores_list[::-1]
                    scores_list = scores_list[:args.topk]
                
                # merge high overlapping boxes (a simple greedy method)
                merged_result = {}
                flag = [1 for _ in range(len(scores_list))]
                for i in range(len(scores_list)):
                    if flag[i]:
                        s, cl_ind, j = scores_list[i]
                        box = all_boxes[cl_ind][j]
                        temp = ([box], [args.label_dict[cl_ind]], [s])

                        # find all high IoU boxes
                        for ii in range(i+1, len(scores_list)):
                            if flag[ii]:
                                s2, cl_ind2, j2 = scores_list[ii]
                                box2 = all_boxes[cl_ind2][j2]
                                if compute_box_iou(box, box2) > global_thresh:
                                    flag[ii] = 0
                                    temp[0].append(box2)
                                    temp[1].append(args.label_dict[cl_ind2])
                                    temp[2].append(s2)
                        
                        merged_box = np.mean(np.concatenate(temp[0], axis=0).reshape(-1,4), axis=0)
                        key = ','.join(merged_box.astype(str).tolist())
                        merged_result[key] = [(l, s) for l,s in zip(temp[1], temp[2])]

                # visualize results
                if not os.path.isdir(os.path.join(args.save_root, info['video_name'])):
                    os.makedirs(os.path.join(args.save_root, info['video_name']))
                print ("info", info)
                overlay_image(os.path.join(args.data_root, info['video_name'], im_format % info['fid']),
                              os.path.join(args.save_root, info['video_name'], im_format % info['fid']),
                              pred_boxes = merged_result,
                              id2class = args.id2class)

                # write to files
                for key in merged_result:
                    box = np.asarray(key.split(','), dtype=np.float32)
                    for l, s in merged_result[key]:
                        fout.write('{0},{1:04},{2:.4},{3:.4},{4:.4},{5:.4},{6},{7:.4}\n'.format(
                                                info['video_name'],
                                                info['fid'],
                                                box[0],box[1],box[2],box[3],
                                                l, s))
            torch.cuda.synchronize()
            t1 = time.time()
            print ("Batch time: ", t1-t0)

            torch.cuda.synchronize()
            t0 = time.time()
                    
    fout.close()

if __name__ == "__main__":
    main()
