In [1]:
# import all we need to compute the mAP and produce some plots
# implementing mAP as documented in The PASCALVisual Object Classes (VOC) Challenge

import numpy as np
import pickle
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# grab all computed results 
# (.pkl format from "darknet_evaluation_post_inference.py")

input_path = "C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS"
input_files = []

for file in os.listdir(input_path):
    if file.endswith(".pkl"):
        input_files.append(os.path.join(input_path,file))
        
input_files.sort()
print("Found {} evaluation files.".format(len(input_files)))
for d, dataset in enumerate(input_files):
    print(d, dataset)
    
use_state = 0

Found 87 evaluation files.
0 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs10001_RESULTS.pkl
1 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs10002_RESULTS.pkl
2 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs10003_RESULTS.pkl
3 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs10004_RESULTS.pkl
4 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs10005_RESULTS.pkl
5 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs1001_RESULTS.pkl
6 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs1002_RESULTS.pkl
7 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs1003_RESULTS.pkl
8 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs1004_RESULTS.pkl
9 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs1005_RESULTS.pkl
10 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs101_RESULTS.pkl
11 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs102_RESULTS.pkl
12 C:/Users/Legos/Documents/PhD/FARTS/BENCHMARK/RESULTS\bs103_RESULTS.pkl
13 C:/

In [3]:
with open(input_files[use_state], 'rb') as f:
    data = pickle.load(f)

"""
- data[0][0]
- training_data_&_training_state

- - data[0][1][0]
- - threshold (for first dataset)
  
- - - data[0][1][1][0 1  2    3   4   5                  6] 
- - - dataset_name,   GT, TP, FN, FP, Average Precision, Recall
"""
#examples:

all_training_states = []

for elem in data:
    all_training_states.append(int(elem[0].split(".")[0].split("_")[-1]))

all_training_states.sort()
print(all_training_states)

[1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000]


The goal is to retrieve the mean Average Precision (mAP) over 13 confidence thresholds ranging from 0.2 to 0.8, classifying a correct detection centre as being within 10% (of the image width) euclidean distance to a ground truth detection, disregarding multiple detections of the same object as they would be suppressed by non-maxmimum suppresion at run-time. We use this adjusted metric from the original, as the actual intersection over union is secondary to the agreement of centres, as different methods have been used to assign bounding boxes. Synthetically generated bounding boxes are defined as the smallest retangle including all projected 2D keypoints in the rendered images, whereas hand annotated bounding boxes are fixed, square detections, as a custom written centre tracking tool (BlenderMotionExport) was used to semi-automatically produce these datasets.

As an example we will plot the precision over recall for these 13 thresholds for the first snapshot of the imported data, and compute the mAP, as in the official [scikit learn implementation](https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/metrics/_ranking.py#L111)

(m)AP summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight:

$${AP} = \sum_n (R_n - R_{n-1}) P_n$$
    
where `P_n` and `R_n` are the precision and recall at the nth threshold. Using decreasing threshold values, the Recall $R_{n-1}$ at the first threshold is set to 0 as when the threshold is maximal, no detections are returned. Therefore, with no positives returned, the precision $P_{n-1}$ is by definition equal to 1.

*This implementation is not interpolated and is different from computing the area under the precision-recall curve with the trapezoidal rule, which uses linear interpolation and can be too optimistic.*

**Note:** this implementation is restricted to the binary classification task or multilabel classification task.

In [4]:
def clean_dataset_name(file_name,verbose=False):
    """
    return the name of the dataset wihtout the split extension
    """
    base_name = os.path.basename(file_name)
    dataset_name = base_name.split("_")[0][:-1]
    if verbose:
        print(dataset_name)
    return dataset_name

print("Found {} evaluation files.".format(len(input_files)))
    
all_nets_all_APs = []

prev_dataset = clean_dataset_name(input_files[0])
current_AP_group = []
training_datasets = [prev_dataset]

for use_state in range(len(input_files)):
    with open(input_files[use_state], 'rb') as f:
        data = pickle.load(f)

    all_training_states = []

    for elem in data:
        all_training_states.append(int(elem[0].split(".")[0].split("_")[-1]))

    all_training_states.sort()

    final_AP = []
    #print("\n",input_files[use_state])
    
    #all_nets_all_APs.append([input_files[use_state]])
    
    for dataset_idx in range(1,6):
        all_AP = []

        for model in data:
            #print("\nProducing AP plot for {}\n".format(model[0]))
            #print("With dataset {}\n".format(model[1][dataset_idx][0]))
            curve_coords = np.zeros([len(model[1:]),2])
            AP = 0
            R_n = 0
            for e, elem in reversed(list(enumerate(model[1:]))):
                curve_coords[e] = [elem[dataset_idx][6],elem[dataset_idx][5]]
                #print("thresh {}   Precision {}    Recall {}".format(elem[0],round(elem[dataset_idx][5],3),round(elem[dataset_idx][6],3)))
                AP += (elem[dataset_idx][6] - R_n) * elem[dataset_idx][5]
                R_n = elem[dataset_idx][6]

            #plt.plot(curve_coords[:,0],curve_coords[:,1])
            #print("AP: {}\n".format(AP))
            all_AP.append([model[1][dataset_idx][0],int(model[0].split(".")[0].split("_")[-1]),AP])

        #plt.show()

        all_AP.sort()
        #print(all_AP[-1])
        final_AP.append(all_AP[-1][-1])

    current_dataset = clean_dataset_name(input_files[use_state])
    if use_state == len(input_files)-1:
        current_AP_group.append(final_AP)
        all_nets_all_APs.append(current_AP_group)
        
    elif current_dataset != prev_dataset:
        prev_dataset = current_dataset
        all_nets_all_APs.append(current_AP_group)
        current_AP_group = []
        training_datasets.append(current_dataset)
        
    else:
        current_AP_group.append(final_AP)
    
        
"""
For the output shape we will produce the mean and standard deviation for each AP value for every model / dataset combination

        |                                 dataset
        |   base   base  bright bright  close  close  dark   drak  noisy  noisy
model   |   mean   std   mean   std     mean   std    mean   std   mean   std
____________________________________________________________________________________
bs1000  |
bs100   |
bs10    |
rb      |
ra      |
...     |

"""
output_AP = np.zeros([len(training_datasets),10])

dat = 0
for dataset_name, APs in zip(training_datasets,all_nets_all_APs):
    
    base_AP_mean = np.mean(np.array(APs)[:,0])
    bright_AP_mean = np.mean(np.array(APs)[:,1])
    close_AP_mean = np.mean(np.array(APs)[:,2])
    dark_AP_mean = np.mean(np.array(APs)[:,3])
    noisy_AP_mean = np.mean(np.array(APs)[:,4])
    
    base_AP_std = np.std(np.array(APs)[:,0])
    bright_AP_std = np.std(np.array(APs)[:,1])
    close_AP_std = np.std(np.array(APs)[:,2])
    dark_AP_std = np.std(np.array(APs)[:,3])
    noisy_AP_std = np.std(np.array(APs)[:,4])
    
    output_AP[dat] = [base_AP_mean, base_AP_std,
                    bright_AP_mean, bright_AP_std,
                    close_AP_mean, close_AP_std,
                    dark_AP_mean, dark_AP_std,
                    noisy_AP_mean, noisy_AP_std]
    
    dat += 1
    # COMBINE WITH DATASET NAMES FOR QUICK OVERVIEW
    # NOW THROW IT ALL INTO ONE PANDAS DATAFRAME

Found 87 evaluation files.


In [5]:
categories = [["base","base","bright","bright","close","close","dark","dark","noisy","noisy"],
              ["mean","std","mean","std","mean","std","mean","std","mean","std"]]

categories_tuples = list(zip(*categories))
columns = pd.MultiIndex.from_tuples(categories_tuples, names=["dataset","score"])
    
final_dataframe = pd.DataFrame(output_AP, index = training_datasets, columns=columns)

In [6]:
final_dataframe

dataset,base,base,bright,bright,close,close,dark,dark,noisy,noisy
score,mean,std,mean,std,mean,std,mean,std,mean,std
bs1000,0.998899,0.000663,0.954139,0.007751,0.764137,0.099988,0.952879,0.01836,0.920278,0.025788
bs100,0.999985,1e-05,0.971896,0.005423,0.533096,0.236077,0.951132,0.004832,0.925608,0.007299
bs10,0.999982,6e-06,0.982924,0.003758,0.844239,0.031983,0.939186,0.01135,0.87733,0.06542
ra,0.999978,3e-05,0.999968,1.4e-05,0.853503,0.003422,0.999947,3.4e-05,0.999975,2.2e-05
rb,0.999989,6e-06,0.987332,0.002325,0.661097,0.029698,0.77009,0.08817,0.831668,0.098753
rba,0.992458,0.001207,0.932712,0.008522,0.727274,0.070794,0.937097,0.018632,0.83578,0.052312
rbr,0.962785,0.004721,0.999991,6e-06,0.301432,0.007992,0.252127,0.07808,0.534953,0.103124
rc,0.842172,0.021024,0.454658,0.033011,0.833703,0.000275,0.402483,0.027076,0.636576,0.038203
rd,0.902126,0.007785,0.861027,0.065426,0.814589,0.012015,0.999952,1.7e-05,0.837906,0.019967
rn,0.955683,0.01328,0.961295,0.008348,0.690445,0.067912,0.906245,0.013539,0.999985,1.7e-05


In [7]:
custom_name = "results_5_percent"

final_dataframe.to_csv(os.path.join(os.path.dirname(input_path),custom_name) + ".csv")

# IF the function below fails, this is likely due to exceeding the number of columns supported by HDF5 files!
# Restrict the number of simulated animals to < 20 if the goal is to train a DLC network

final_dataframe.to_hdf(
    os.path.join(os.path.dirname(input_path), custom_name) + ".h5",
    "df_with_missing",
    format="table",
    mode="w")
    