# [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5)

<a id="overview"></a>
## Overview

This Jupyter notebook covers [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5). It validates that experimental data obtained via automated, portable and reproducible [Collective Knowledge](http://cknowledge.org) workflows conforms to [General MLPerf Submission Rules](https://github.com/mlperf/policies/blob/master/submission_rules.adoc)
and [MLPerf Inference Rules](https://github.com/mlperf/inference_policies/blob/master/inference_rules.adoc), including runnning the official [`submission_checker.py`](https://github.com/mlperf/inference/blob/master/v0.5/tools/submission/submission-checker.py).

## Table of Contents

1. [Overview](#overview)
1. [Includes](#includes)
1. [Systems](#systems)
  1. [Firefly-RK3399](#systems_firefly)

<a id="includes"></a>
## Includes

### Standard

In [None]:
import os
import sys
import json
import re

from copy import deepcopy
from pprint import pprint

### Scientific

If some of the scientific packages are missing, please install them using:
```
# python3 -m pip install jupyter pandas numpy matplotlib seaborn --user
```

In [None]:
import IPython as ip
import pandas as pd
import numpy as np
import matplotlib as mp
import seaborn as sb

In [None]:
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Matplotlib version: %s' % mp.__version__)
print ('Seaborn version: %s' % sb.__version__)

In [None]:
from IPython.display import Image, display
def display_in_full(df):
    pd.options.display.max_columns = len(df.columns)
    pd.options.display.max_rows = len(df.index)
    display(df)

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

In [None]:
default_colormap = cm.autumn
default_fontsize = 16
default_barwidth = 0.8
default_figwidth = 24
default_figheight = 3
default_figdpi = 200
default_figsize = [default_figwidth, default_figheight]

In [None]:
if mp.__version__[0]=='2': mp.style.use('classic')
mp.rcParams['figure.max_open_warning'] = 200
mp.rcParams['figure.dpi'] = default_figdpi
mp.rcParams['font.size'] = default_fontsize
mp.rcParams['legend.fontsize'] = 'medium'

In [None]:
save_fig_dir = os.path.join(os.path.expanduser("~"), 'mlperf-dividiti')
if not os.path.exists(save_fig_dir):
    os.mkdir(save_fig_dir)

### Collective Knowledge

If CK is not installed, please install it using:
```
# python -m pip install ck
```

In [None]:
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)

<a id="systems"></a>
## Systems

<a id="systems_firefly"></a>
### [Firefly-RK3399](http://en.t-firefly.com/product/rk3399/)

In [None]:
firefly = {
    "division": "",
    "submitter": "dividiti",
    "status": "available",
    "system_name": "Firefly-RK3399",

    "number_of_nodes": "1",
    "host_processor_model_name": "Arm Corthttp://en.t-firefly.com/product/rk3399/ex-A72 MP2 (big); Arm Cortex-A53 MP4 (LITTLE)",
    "host_processors_per_node": "1",
    "host_processor_core_count": "2 (big); 4 (LITTLE)",
    "host_processor_frequency": "1800 MHz (big), 1400 MHz (LITTLE)",
    "host_processor_caches": "48 kB L1 instruction cache, 32 kB L1 data cache, 1 MB L2 unified cache (big); 32 kB L1 instruction cache, 32 kB L1 data cache, 512 kB L2 unified cache (LITTLE)",    
    "host_memory_configuration": "-",
    "host_memory_capacity": "4 GB",
    "host_storage_capacity": "128 GB",
    "host_storage_type": "SanDisk Extreme microSD",
    "host_processor_interconnect": "-",
    "host_networking": "-",
    "host_networking_topology": "-",

    "accelerators_per_node": "1",
    "accelerator_model_name": "Arm Mali-T860 MP4",
    "accelerator_frequency": "800 MHz",
    "accelerator_host_interconnect": "-",
    "accelerator_interconnect": "-",
    "accelerator_interconnect_topology": "-",
    "accelerator_memory_capacity": "4 GB (shared with host)",
    "accelerator_memory_configuration": "-",
    "accelerator_on-chip_memories": "-",
    "cooling": "on-board fan",
    "hw_notes": "http://en.t-firefly.com/product/rk3399/; http://opensource.rock-chips.com/wiki_RK3399",

    "framework": "",
    "operating_system": "Ubuntu 16.04.6 LTS",
    "other_software_stack": "GCC 7.4.0; Python 3.5.2",
    "sw_notes": "Powered by Collective Knowledge v1.11.1"
}

In [None]:
open_firefly_tflite = deepcopy(firefly)
open_firefly_tflite.update({
    "division" : "open",
    "framework" : "TFLite-v1.15-rc2"
})
open_firefly_tflite

In [None]:
closed_firefly_tflite = deepcopy(firefly)
closed_firefly_tflite.update({
    "division" : "closed",
    "framework" : "TFLite-v1.15-rc2"
})
closed_firefly_tflite

### All

In [None]:
division_systems = {
    'open-firefly-tflite-v1.15' : open_firefly_tflite,
    'closed-firefly-tflite-v1.15' : closed_firefly_tflite
}

In [None]:
# Generate implementation_benchmarks dictionary.
implementation_benchmarks = {}
implementation = 'image-classification-tflite'

implementation_mobilenet = implementation+'-'+'mobilenet'
implementation_benchmarks[implementation_mobilenet] = {
    "input_data_types": "fp32",
    "weight_data_types": "fp32",
    "retraining": "no",
    "starting_weights_filename": "https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz",
    "weight_transformations": "TFLite"
}

implementation_resnet = implementation+'-'+'resnet'
implementation_benchmarks[implementation_resnet] = {
    "input_data_types": "fp32",
    "weight_data_types": "fp32",
    "retraining": "no",
    "starting_weights_filename": "https://zenodo.org/record/2535873/files/resnet50_v1.pb",
    "weight_transformations": "TF -> TFLite"
}

# Add any MobileNets-v1,v2 model.
def add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution):
    base_url = 'https://zenodo.org/record/2269307/files' if version == 1 else 'https://zenodo.org/record/2266646/files'
    url = '{}/mobilenet_v{}_{}_{}.tgz'.format(base_url, version, multiplier, resolution)
    benchmark = 'mobilenet-v{}-{}-{}'.format(version, multiplier, resolution)
    if implementation == 'image-classification-tflite':
        weights_transformations = 'TFLite'
    elif implementation == 'image-classification-armnn-tflite':
        weights_transformations = 'TFLite -> ArmNN'
    else:
        raise "Unknown implementation '%s'!" % implementation
    implementation_benchmark = implementation+'-'+benchmark
    implementation_benchmarks[implementation_benchmark] = {
        "input_data_types": "fp32",
        "weight_data_types": "fp32",
        "retraining": "no",
        "starting_weights_filename": url,
        "weight_transformations": weights_transformations
    }
    return

# MobileNet-v1.
version = 1
for multiplier in [ 1.0, 0.75, 0.5, 0.25 ]:
    for resolution in [ 224, 192, 160, 128 ]:
        add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution)
# MobileNet-v2.
version = 2
for multiplier in [ 1.0, 0.75, 0.5, 0.35 ]:
    for resolution in [ 224, 192, 160, 128, 96 ]:
        add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution)
add_implementation_mobilenet(implementation_benchmarks, version=2, multiplier=1.3, resolution=224)
add_implementation_mobilenet(implementation_benchmarks, version=2, multiplier=1.4, resolution=224)

# from pprint import pprint
# pprint(implementation_benchmarks)

### Defaults (to catch uninitialized descriptions)

In [None]:
# system_desc_id.json
default_system_json = {
    "division": "reqired",
    "submitter": "required",
    "status": "required",
    "system_name": "required",

    "number_of_nodes": "required",
    "host_processor_model_name": "required",
    "host_processors_per_node": "required",
    "host_processor_core_count": "required",
    "host_processor_frequency": "",
    "host_processor_caches": "",
    "host_memory_configuration": "",
    "host_memory_capacity": "required",
    "host_storage_capacity": "required",
    "host_storage_type": "required",
    "host_processor_interconnect": "",
    "host_networking": "",
    "host_networking_topology": "",

    "accelerators_per_node": "required",
    "accelerator_model_name": "required",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "required",
    "accelerator_memory_configuration": "",
    "accelerator_on-chip_memories": "",
    "cooling": "",
    "hw_notes": "",

    "framework": "required",
    "operating_system": "required",
    "other_software_stack": "required",
    "sw_notes": ""
}

# system_desc_id_imp.json
default_implementation_benchmark_json = {
    "input_data_types": "required",
    "retraining": "required",
    "starting_weights_filename": "required",
    "weight_data_types": "required",
    "weight_transformations": "required"
}

<a id="get"></a>
## Get the experimental data

Download experimental data and add CK repositories as follows:
```
wget https://www.dropbox.com/s/jbpdh6c5d8fgwpl/mlperf.open.image-classification.firefly.zip
$ ck add repo --zip=mlperf.open.image-classification.firefly.zip
```

In [None]:
repos = [
#    'mlperf.open.image-classification.firefly'
    'mlperf.closed.image-classification.firefly'
]

In [None]:
for repo_uoa in repos:
    !ck list $repo_uoa:experiment:* | sort
    print ("*" * 80)

<a id="check"></a>
## Check the experimental data

In [None]:
def get_experimental_results(repo_uoa, module_uoa='experiment', tags='mlperf', submitter='dividiti', path=None):
    if not path:
        path_list = !ck find repo:$repo_uoa
        path = path_list[0]
    root_dir = os.path.join(path, 'submissions_inference_0_5')
    if not os.path.exists(root_dir): os.mkdir(root_dir)
    print("Storing results under '%s'" % root_dir)
    
    r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':tags})
    if r['return']>0:
        print('Error: %s' % r['error'])
        exit(1)
    experiments = r['lst']

    dfs = []
    for experiment in experiments:
        data_uoa = experiment['data_uoa']
        r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
        if r['return']>0:
            print('Error: %s' % r['error'])
            exit(1)
        print ("*" * 80)
        
        tags = r['dict']['tags']
        if 'accuracy' in tags:
            if 'neon' in tags or 'opencl' in tags:
                # Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "accuracy", "using-opencv" ]
                (_, division, task, platform, library, backend, benchmark, scenario, mode, preprocessing) = tags
            else:
                # Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "accuracy", "using-opencv" ]
                (_, division, task, platform, library, benchmark, scenario, mode, preprocessing) = tags
        elif 'performance' in tags:            
            if 'neon' in tags or 'opencl' in tags:
                # Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
                (_, division, task, platform, library, backend, benchmark, scenario, mode) = tags
            else:
                # Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
                (_, division, task, platform, library, benchmark, scenario, mode) = tags
        else:
            raise "Expected 'accuracy' or 'performance' in tags!"
            
        organization = submitter
        system = platform+'-'+library
        division_system = division+'-'+system
        if library.startswith('tflite'):
            implementation = task+'-tflite'
        elif library.startswith('armnn-tflite'):
            implementation = task+'-armnn-tflite'
        else: # Official app with CK adaptations.
            implementation = 'mlperf-inference-vision'
        implementation_benchmark = implementation+'-'+benchmark
        
        #
        # Directory structure according to the Inference section of the General MLPerf Submission Rules:
        # https://github.com/mlperf/policies/blob/master/submission_rules.adoc#552-inference
        #
        # <division>/
        #   <organization>/
        #
        division_dir = os.path.join(root_dir, division)
        if not os.path.exists(division_dir): os.mkdir(division_dir)
        organization_dir = os.path.join(division_dir, organization)
        if not os.path.exists(organization_dir): os.mkdir(organization_dir)
        
        #
        #     "systems"/
        #       <system_desc_id>.json
        #
        systems_dir = os.path.join(organization_dir, 'systems')
        if not os.path.exists(systems_dir): os.mkdir(systems_dir)
        system_json_name = '%s.json' % system
        system_json_path = os.path.join(systems_dir, system_json_name)
        with open(system_json_path, 'w') as system_json_file:
            system_json = division_systems.get(division_system, default_system_json)
            json.dump(system_json, system_json_file, indent=2)
            print('%s' % systems_dir)
            if system_json == default_system_json:
                print('  |_ %s [DEFAULT]' % system_json_name)
                raise
            else:
                print('  |_ %s [%s]' % (system_json_name, division_system))
        
        #
        #     "code"/
        #       <benchmark_name_per_reference>/
        #         <implementation_id>/
        #           <Code interface with loadgen and other arbitrary stuff>
        #
        code_dir = os.path.join(organization_dir, 'code')
        if not os.path.exists(code_dir): os.mkdir(code_dir)
        # FIXME: For now, not always "per reference".
        benchmark_dir = os.path.join(code_dir, benchmark)
        if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
        implementation_dir = os.path.join(benchmark_dir, implementation)
        if not os.path.exists(implementation_dir): os.mkdir(implementation_dir)
        print('%s' % code_dir)
        print('  |_ %s [DEFAULT]' % implementation)
        # TODO: Add basic description of image-classification-{tflite,armnn-tflite}.
        
        #
        #     "measurements"/
        #       <system_desc_id>/
        #         <benchmark>/
        #           <scenario>/
        #             <system_desc_id>_<implementation_id>.json
        #             README.md
        #             user.conf
        #             mlperf.conf
        #             calibration_process.adoc (?)
        #
        measurements_dir = os.path.join(organization_dir, 'measurements')
        if not os.path.exists(measurements_dir): os.mkdir(measurements_dir)
        system_dir = os.path.join(measurements_dir, system)
        if not os.path.exists(system_dir): os.mkdir(system_dir)
        benchmark_dir = os.path.join(system_dir, benchmark)
        if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
        scenario_dir = os.path.join(benchmark_dir, scenario)
        if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
        print(scenario_dir)
        # Touch empty files.
        # <system_desc_id>_<implementation_id>.json
        system_implementation_json_name = system+'_'+implementation+'.json'
        system_implementation_json_path = os.path.join(scenario_dir, system_implementation_json_name)
        with open(system_implementation_json_path, 'w') as system_implementation_json_file:
            implementation_benchmark_json = implementation_benchmarks.get(implementation_benchmark, default_implementation_benchmark_json)
            json.dump(implementation_benchmark_json, system_implementation_json_file, indent=2)
            if implementation_benchmark_json == default_implementation_benchmark_json:
                print('  |_ %s [DEFAULT]' % system_implementation_json_name)
                raise
            else:
                print('  |_ %s [%s]' % (system_implementation_json_name, implementation_benchmark))
        # README.md
        readme_name = 'README.md'
        readme_path = os.path.join(scenario_dir, readme_name)
        with open(readme_path, 'a'):
            os.utime(readme_path, None)
            print('  |_ %s [EMPTY]' % readme_name)
        # user.conf
        user_conf_name = 'user.conf'
        user_conf_path = os.path.join(scenario_dir, user_conf_name)
        with open(user_conf_path, 'a'):
            os.utime(user_conf_path, None)
            print('  |_ %s [EMPTY]' % user_conf_name)
        # mlperf.conf
        mlperf_conf_name = 'mlperf.conf'
        mlperf_conf_path = os.path.join(scenario_dir, mlperf_conf_name)
        with open(mlperf_conf_path, 'a'):
            os.utime(mlperf_conf_path, None)
            print('  |_ %s [EMPTY]' % mlperf_conf_name)
        
        #
        #     "results"/
        #       <system_desc_id>/
        #         <benchmark>/
        #           <scenario>/
        #             performance/
        #               run_x/ # 1 run for single stream and offline, 5 otherwise
        #                 mlperf_log_summary.txt
        #                 mlperf_log_detail.txt
        #                 mlperf_log_trace.json
        #             accuracy/
        #               mlperf_log_accuracy.json
        #       compliance_checker_log.txt (?)
        #
        results_dir = os.path.join(organization_dir, 'results')
        if not os.path.exists(results_dir): os.mkdir(results_dir)
        system_dir = os.path.join(results_dir, system)
        if not os.path.exists(system_dir): os.mkdir(system_dir)
        benchmark_dir = os.path.join(system_dir, benchmark)
        if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
        scenario_dir = os.path.join(benchmark_dir, scenario)
        if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
        mode_dir = os.path.join(scenario_dir, mode)
        if not os.path.exists(mode_dir): os.mkdir(mode_dir)
        # For each point (should be one point for each performance run).
        points = r['points']
        for (point, point_idx) in zip(points, range(1,len(points)+1)):
            point_file_path = os.path.join(r['path'], 'ckp-%s.0001.json' % point)
            with open(point_file_path) as point_file:
                point_data_raw = json.load(point_file)
            characteristics_list = point_data_raw['characteristics_list']
            characteristics = characteristics_list[0]
            # Set the leaf directory.
            if mode == 'performance':
                run_dir = os.path.join(mode_dir, 'run_%d' % point_idx)
                if not os.path.exists(run_dir): os.mkdir(run_dir)
                last_dir = run_dir
            else:
                last_dir = mode_dir
            print(last_dir)
            # Dump files in the leaf directory.
            mlperf_log = characteristics['run'].get('mlperf_log',{})
            # Summary file (with errors and warnings in accuracy mode, with statistics in performance mode).
            summary_txt_name = 'mlperf_log_summary.txt'
            summary_txt_path = os.path.join(last_dir, summary_txt_name)
            with open(summary_txt_path, 'w') as summary_txt_file:
                summary_txt_file.writelines(mlperf_log.get('summary',''))
                print('  |_ %s' % summary_txt_name)
            # Detail file (with settings).
            detail_txt_name = 'mlperf_log_detail.txt'
            detail_txt_path = os.path.join(last_dir, detail_txt_name)
            with open(detail_txt_path, 'w') as detail_txt_file:
                detail_txt_file.writelines(mlperf_log.get('detail',''))
                print('  |_ %s' % detail_txt_name)
            # Accuracy file (with accuracy dictionary).
            # FIXME: Move the next 5 lines into the (if mode == 'accuracy') block,
            # once the submission checker no longer complains as follows:
            # "performance/run_1 has file list mismatch (['mlperf_log_accuracy.json'])"
            accuracy_json_name = 'mlperf_log_accuracy.json'
            accuracy_json_path = os.path.join(last_dir, accuracy_json_name)
            with open(accuracy_json_path, 'w') as accuracy_json_file:
                json.dump(mlperf_log.get('accuracy',{}), accuracy_json_file, indent=2)
                print('  |_ %s' % accuracy_json_name)
            if mode == 'accuracy':
                # FIXME: Do not hardcode - locate via CK.
                accuracy_imagenet_py = '$HOME/CK_TOOLS/mlperf-inference-upstream.master/inference/v0.5/classification_and_detection/tools/accuracy-imagenet.py'
                imagenet_val_file = '$HOME/CK_TOOLS/dataset-imagenet-ilsvrc2012-aux/val.txt'
                accuracy_txt_name = 'accuracy.txt'
                accuracy_txt_path = os.path.join(last_dir, accuracy_txt_name)
                accuracy_txt = !python3 $accuracy_imagenet_py --imagenet-val-file $imagenet_val_file --mlperf-accuracy-file $accuracy_json_path
                with open(accuracy_txt_path, 'w') as accuracy_txt_file:
                    accuracy_txt_file.writelines(accuracy_txt)
                    # Print the first line containing accuracy info.
                    print('  |_ %s ("%s")' % (accuracy_txt_name, accuracy_txt[0]))
#             # Trace file (should omit trace from v0.5).
#             trace_json_name = 'mlperf_log_trace.json'
#             trace_json_path = os.path.join(last_dir, trace_json_name)
#             with open(trace_json_path, 'w') as trace_json_file:
#                 json.dump(mlperf_log.get('trace',{}), trace_json_file, indent=2)

    print ("*" * 80)
    # FIXME: Do not hardcode - locate via CK.
    submission_checker_py = '$HOME/CK_TOOLS/mlperf-inference-upstream.master/inference/v0.5/tools/submission/submission-checker.py'
    !python3 $submission_checker_py --input $root_dir --submitter $submitter
    
    return dfs

dfs = get_experimental_results(repo_uoa)