# Evaluation

- Compute average precision and average recall on the test dataset of a given model.
- The evaluation metrics are the ones defined in the COCO dataset.

In [1]:
%env CUDA_VISIBLE_DEVICES=1
%load_ext autoreload
%autoreload 2
from pathlib import Path

import matplotlib.pyplot as plt
import tqdm
import torch

import sys; sys.path.append("../")
import maskflow

root_dir = Path("/home/hadim/.data/Neural_Network/Maskflow/C_elegans")
data_dir = root_dir / "Data"
model_dir = root_dir / "Models"
model_dir.mkdir(exist_ok=True)

# Import the configuration associated with this dataset and network.
config = maskflow.config.load_config(root_dir / "config.yaml")

env: CUDA_VISIBLE_DEVICES=1


In [2]:
# Select the model
model_name = '2018.11.10-19:01:52'
model_path = model_dir / model_name

# Set some configurations
config['MODEL']['DEVICE'] = "cuda"
config['DATALOADER']['NUM_WORKERS'] = 16
config['TEST']['IMS_PER_BATCH'] = 16
    
# Run the evaluation
# (it will create a file called evauation.json in `model_path`)
results = maskflow.inference.run_evaluation(config, model_path, data_dir)

2018-11-10 19:07:35,635:INFO:maskrcnn_benchmark.utils.checkpoint: Loading checkpoint from /home/hadim/.data/Neural_Network/Maskflow/C_elegans/Models/2018.11.10-19:01:52/model_0000100.pth
2018-11-10 19:07:35,980:INFO:maskrcnn_benchmark.inference: Start evaluation on 20 images


loading annotations into memory...
Done (t=0.03s)
creating index...
index created!


  "See the documentation of nn.Upsample for details.".format(mode))
2it [00:03,  2.38s/it]
2018-11-10 19:07:40,164:INFO:maskrcnn_benchmark.inference: Total inference time: 0:00:04.183446 (0.2091723084449768 s / img per device, on 1 devices)
2018-11-10 19:07:40,165:INFO:maskrcnn_benchmark.inference: Preparing results for COCO format
2018-11-10 19:07:40,166:INFO:maskrcnn_benchmark.inference: Preparing bbox results
2018-11-10 19:07:40,175:INFO:maskrcnn_benchmark.inference: Preparing segm results
20it [00:00, 134.30it/s]
2018-11-10 19:07:40,327:INFO:maskrcnn_benchmark.inference: Evaluating predictions


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.16s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.605
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.859
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.690
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.639
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.645
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.566
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.683
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1

2018-11-10 19:07:40,760:INFO:maskrcnn_benchmark.inference: OrderedDict([('bbox', OrderedDict([('AP', 0.6046953782940907), ('AP50', 0.8587912170575308), ('AP75', 0.6904564962104874), ('APs', 0.6388035883250541), ('APm', 0.6448140613799532), ('APl', -1.0)])), ('segm', OrderedDict([('AP', 0.46829496288222305), ('AP50', 0.7769171964098233), ('AP75', 0.5463391218732842), ('APs', 0.40761270176451364), ('APm', 0.5603695613378286), ('APl', -1.0)]))])


creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=0.19s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.468
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.777
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.408
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.560
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.049
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.466
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.563
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.516
 Average Recall     (AR) @[ IoU=0.5

---

Microtubule, `2018.11.10-16:39:38`:

| Step | mAP | mAR | AP@0.50 | AP@0.75
| --- | --- | --- | --- | --- |
| 750 | 0.33 | 0.02 | 0.62 | 0.32 |
| 1000 | 0.33 | 0.02 | 0.61 | 0.31 |
| 1500 | 0.34 | 0.02 | 0.61 | 0.33 |
| 2500 | 0.36 | 0.019 | 0.65 | 0.36 |