This document is used in order to test out different functionalities that will later be put into their own separate functions and classes

In [2]:
import pickle
import os
import numpy as np
import pandas as pd
import copy

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import cv2

from lxml import etree
import re
import tqdm

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
import importlib  
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '/Users/guillaumekugener/Documents/USC/USC_docs/ml/surgical-training-project/tools')

import utils

In [200]:
main_output_directory = '/Users/guillaumekugener/Documents/USC/USC_docs/ml/surgical-training-project/'

In [5]:
data_dir = '/Users/guillaumekugener/Documents/USC/USC_docs/ml/datasets/surgical-auto-labelled-videos/'
ANNOTATION_DIR = '/Users/guillaumekugener/Documents/USC/USC_docs/ml/datasets/large-clean-surgical-ds/Annotations'
classes_file = '/Users/guillaumekugener/Documents/USC/USC_docs/ml/datasets/clean-surgical-ds/classes.name'

In [6]:
images_main_directory = '/Users/guillaumekugener/Documents/USC/USC_docs/ml/datasets/large-clean-surgical-ds/JPEGImages/'

In [7]:
classes_map = pd.read_csv(classes_file, sep='\t',header=None)

In [191]:
dict_video_data_start_id = {
    'S306T1': 1730,
    'S306T2': 15140,
    'S611T1': 23990, # This is the one used for validation
    'S609T2': 20330
}


In [211]:
complete_df = None
total_positives = {}
for video_id in dict_video_data_start_id:
    # First get the number of positives in this dataset
    all_frames = [os.path.join(ANNOTATION_DIR, a) for a in os.listdir(ANNOTATION_DIR) if re.search('^' + video_id, a)]
    current_positives = utils.total_objects_to_detect_in_video(all_frames, classes_map)
    total_positives[video_id] = current_positives
    
    video_start_id = dict_video_data_start_id[video_id]

    data = None

    with open(os.path.join(data_dir, video_id + '_stats.pkl'), 'rb') as stats_array:
        data = pickle.load(stats_array)

    # All the processing we have to do before we can calculate the mAP
    pandas_detected_objects = utils.make_df_of_detected_objects(
        data, 
        video_id, 
        video_start_id, 
        classes_map,
        min_score=0.01)
    
    pandas_detected_objects = utils.calculate_precision_recall(
        pandas_detected_objects, 
        ANNOTATION_DIR,
        classes_map,
        video_id
    )
    
    if complete_df is None:
        complete_df = pandas_detected_objects
    else:
        complete_df = pd.concat([complete_df, pandas_detected_objects])
        
    map_value = utils.calculate_map(
        pandas_detected_objects, 
        sum([current_positives[i] for i in current_positives]))
    print(f"Map for {video_id}: {map_value}")

100%|██████████| 9371/9371 [00:03<00:00, 2507.37it/s]
9371it [00:25, 360.67it/s]
100%|██████████| 937100/937100 [03:15<00:00, 4802.70it/s]
  0%|          | 0/5921 [00:00<?, ?it/s]

Map for S306T1: 0.3368886251378527


100%|██████████| 5921/5921 [00:02<00:00, 2500.05it/s]
5921it [00:15, 372.93it/s]
100%|██████████| 592100/592100 [01:59<00:00, 4940.66it/s]
  5%|▍         | 226/4811 [00:00<00:02, 2255.65it/s]

Map for S306T2: 0.6916805271698753


100%|██████████| 4811/4811 [00:02<00:00, 2311.72it/s]
4811it [00:12, 389.84it/s]
100%|██████████| 481100/481100 [01:36<00:00, 4967.07it/s]
  7%|▋         | 192/2561 [00:00<00:01, 1905.54it/s]

Map for S611T1: 0.6615636972616987


100%|██████████| 2561/2561 [00:01<00:00, 2046.62it/s]
2561it [00:07, 353.92it/s]
100%|██████████| 256100/256100 [00:52<00:00, 4850.28it/s]


Map for S609T2: 0.7732341278689759


In [212]:
complete_df.to_csv(os.path.join(main_output_directory, 'data/yolov3_complete_metrics.csv'))

In [213]:
total_positives_df = {
    'video_id': [],
    'tool': [],
    'total': []
}
for k in total_positives:
    for j in total_positives[k]:
        total_positives_df['video_id'].append(k)
        total_positives_df['tool'].append(j)
        total_positives_df['total'].append(total_positives[k][j])
        
total_positives_df = pd.DataFrame(total_positives_df)
total_positives_df.to_csv(os.path.join(main_output_directory, 'data/ground_truth_totals.csv'))