# [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5)

## Table of Contents

<a id="overview"></a>
## Overview

- [General MLPerf Submission Rules](https://github.com/mlperf/policies/blob/master/submission_rules.adoc)
- [MLPerf Inference Rules](https://github.com/mlperf/inference_policies/blob/master/inference_rules.adoc)
- [`submission_checker.py`](https://github.com/mlperf/inference/blob/master/v0.5/tools/submission/submission-checker.py)

<a id="systems"></a>
## Systems

### Firefly-RK3399

In [None]:
firefly_tflite = {
    "division": "open",
    "submitter": "dividiti",
    "status": "available",
    "system_name": "Firefly-RK3399",

    "number_of_nodes": "1",
    "host_processor_model_name": "Arm Cortex-A73 (big), Arm Cortex-A53 (LITTLE)",
    "host_processors_per_node": "1",
    "host_processor_core_count": "2 (big), 4 (LITTLE)",
    "host_processor_frequency": "1800 MHz (big), 1400 MHz (LITTLE)",
    "host_processor_caches": "N/A",
    "host_memory_configuration": "?",
    "host_memory_capacity": "4 GB",
    "host_storage_capacity": "128 GB",
    "host_storage_type": "microSD",
    "host_processor_interconnect": "N/A",
    "host_networking": "N/A",
    "host_networking_topology": "N/A",

    "accelerators_per_node": "1",
    "accelerator_model_name": "Arm Mali-T860 MP4",
    "accelerator_frequency": "800 MHz",
    "accelerator_host_interconnect": "N/A",
    "accelerator_interconnect": "N/A",
    "accelerator_interconnect_topology": "N/A",
    "accelerator_memory_capacity": "4 GB (shared)",
    "accelerator_memory_configuration": "?",
    "accelerator_on-chip_memories": "N/A",
    "cooling": "on-board fan",
    "hw_notes": "http://en.t-firefly.com/product/rk3399/",

    "framework": "TFLite-v1.15-rc2",
    "operating_system": "Ubuntu 16.04.6 LTS",
    "other_software_stack": "Collective Knowledge v1.11.1",
    "sw_notes": "Powered by CK"
}

In [None]:
systems = {
    'firefly-tflite-v1.15' : firefly_tflite
}

In [None]:
# system_desc_id.json
default_system_desc_id = {
    "division": "reqired",
    "submitter": "required",
    "status": "required",
    "system_name": "required",

    "number_of_nodes": "required",
    "host_processor_model_name": "required",
    "host_processors_per_node": "required",
    "host_processor_core_count": "required",
    "host_processor_frequency": "",
    "host_processor_caches": "",
    "host_memory_configuration": "",
    "host_memory_capacity": "required",
    "host_storage_capacity": "required",
    "host_storage_type": "required",
    "host_processor_interconnect": "",
    "host_networking": "",
    "host_networking_topology": "",

    "accelerators_per_node": "required",
    "accelerator_model_name": "required",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "required",
    "accelerator_memory_configuration": "",
    "accelerator_on-chip_memories": "",
    "cooling": "",
    "hw_notes": "",

    "framework": "required",
    "operating_system": "required",
    "other_software_stack": "required",
    "sw_notes": ""
}

# system_desc_id_imp.json
default_system_desc_id_tmp = {
    "input_data_types": "required",
    "retraining": "required",
    "starting_weights_filename": "required",
    "weight_data_types": "required",
    "weight_transformations": "required"
}

<a id="data"></a>
## Get the experimental data

<a id="code"></a>
## Data wrangling code

**NB:** Please ignore this section if you are not interested in re-running or modifying this notebook.

### Includes

#### Standard

In [None]:
import os
import sys
import json
import re

#### Scientific

If some of the scientific packages are missing, please install them using:
```
# pip install jupyter pandas numpy matplotlib
```

In [None]:
import IPython as ip
import pandas as pd
import numpy as np
import matplotlib as mp
import seaborn as sb

In [None]:
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Matplotlib version: %s' % mp.__version__)
print ('Seaborn version: %s' % sb.__version__)

In [None]:
from IPython.display import Image, display
def display_in_full(df):
    pd.options.display.max_columns = len(df.columns)
    pd.options.display.max_rows = len(df.index)
    display(df)

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

In [None]:
default_colormap = cm.autumn
default_fontsize = 16
default_barwidth = 0.8
default_figwidth = 24
default_figheight = 3
default_figdpi = 200
default_figsize = [default_figwidth, default_figheight]

In [None]:
if mp.__version__[0]=='2': mp.style.use('classic')
mp.rcParams['figure.max_open_warning'] = 200
mp.rcParams['figure.dpi'] = default_figdpi
mp.rcParams['font.size'] = default_fontsize
mp.rcParams['legend.fontsize'] = 'medium'

In [None]:
save_fig_dir = os.path.join(os.path.expanduser("~"), 'mlperf-dividiti')
if not os.path.exists(save_fig_dir):
    os.mkdir(save_fig_dir)

In [None]:
from pprint import pprint

#### Collective Knowledge

If CK is not installed, please install it using:
```
# python -m pip install ck
```

In [None]:
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)

#### Experimental data

Download experimental data and add CK repositories as follows:
```
$ wget https://www.dropbox.com/s/<...>/mlperf.*.zip
$ ck add repo --zip=mlperf.*.zip
```

In [None]:
#!ck recache repo
repo_uoa = 'mlperf.open.image-classification.firefly'
!ck list $repo_uoa:experiment:* | sort
print ("*****************************************")

### Access experimental data

In [None]:
def get_experimental_results(repo_uoa, module_uoa='experiment', tags='mlperf', path=None, submitter='dividiti'):
    if not path:
        path_list = !ck find repo:$repo_uoa
        path = path_list[0]
    print("Storing results under '%s' ...\n" % path)
    
    r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':tags})
#    pprint (r)
    if r['return']>0:
        print('Error: %s' % r['error'])
        exit(1)
    experiments = r['lst']

    dfs = []
    for experiment in experiments:
        data_uoa = experiment['data_uoa']
        r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
        if r['return']>0:
            print('Error: %s' % r['error'])
            exit(1)
        
        tags = r['dict']['tags']
        # Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
        if 'accuracy' in tags:
            (_, division, task, platform, library, benchmark, scenario, mode, preprocessing) = tags
        elif 'performance' in tags:
            (_, division, task, platform, library, benchmark, scenario, mode) = tags
        organization = submitter
        system = platform+'-'+library
        
        # Directory structure for submissions according to MLPerf Inference Rules:
        #
        # <division>/
        #   <organization>/
        #     "systems"/
        #       <system_desc_id>.json
        #     "results"/
        #       <system_desc_id>/
        #         <benchmark>/
        #           <scenario>/
        #             performance/
        #               run_x/ # 1 run for single stream and offline, 5 otherwise
        #                 mlperf_log_summary.txt
        #                 mlperf_log_detail.txt
        #                 mlperf_log_trace.json
        #             accuracy/
        #               mlperf_log_accuracy.json
        #   compliance_checker_log.txt
        #
        root_dir = os.path.join(path, 'submissions_inference_0_5')
        if not os.path.exists(root_dir): os.mkdir(root_dir)
        division_dir = os.path.join(root_dir, division)
        if not os.path.exists(division_dir): os.mkdir(division_dir)
        organization_dir = os.path.join(division_dir, organization)
        if not os.path.exists(organization_dir): os.mkdir(organization_dir)
        # "systems"/
        systems_dir = os.path.join(organization_dir, 'systems')
        if not os.path.exists(systems_dir): os.mkdir(systems_dir)
        system_json_name = '%s.json' % system
        system_json_path = os.path.join(systems_dir, system_json_name)
        with open(system_json_path, 'w') as system_json_file:
            json.dump(systems.get(system, default_system_desc_id), system_json_file, indent=2)
            print('  |_ %s' % system_json_name)
        # "results"/
        results_dir = os.path.join(organization_dir, 'results')
        if not os.path.exists(results_dir): os.mkdir(results_dir)
        system_dir = os.path.join(results_dir, system)
        if not os.path.exists(system_dir): os.mkdir(system_dir)
        benchmark_dir = os.path.join(system_dir, benchmark)
        if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
        scenario_dir = os.path.join(benchmark_dir, scenario)
        if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
        mode_dir = os.path.join(scenario_dir, mode)
        if not os.path.exists(mode_dir): os.mkdir(mode_dir)
        # For each point (should be one point for each performance run).
        points = r['points']
        for (point, point_idx) in zip(points, range(1,len(points)+1)):
            point_file_path = os.path.join(r['path'], 'ckp-%s.0001.json' % point)
            with open(point_file_path) as point_file:
                point_data_raw = json.load(point_file)
            characteristics_list = point_data_raw['characteristics_list']
            characteristics = characteristics_list[0]
            # Set the leaf directory.
            if mode == 'performance':
                run_dir = os.path.join(mode_dir, 'run_%d' % point_idx)
                if not os.path.exists(run_dir): os.mkdir(run_dir)
                last_dir = run_dir
            else:
                last_dir = mode_dir
            print(last_dir)
            # Dump files in the leaf directory.
            mlperf_log = characteristics['run'].get('mlperf_log',{})
            # Summary file (with errors and warnings).
            summary_txt_name = 'mlperf_log_summary.txt'
            summary_txt_path = os.path.join(last_dir, summary_txt_name)
            with open(summary_txt_path, 'w') as summary_txt_file:
                summary_txt_file.writelines(mlperf_log.get('summary',''))
                print('  |_ %s' % summary_txt_name)
            # Detail file (with settings).
            detail_txt_name = 'mlperf_log_detail.txt'
            detail_txt_path = os.path.join(last_dir, detail_txt_name)
            with open(detail_txt_path, 'w') as detail_txt_file:
                detail_txt_file.writelines(mlperf_log.get('detail',''))
                print('  |_ %s' % detail_txt_name)
            # Accuracy file (with accuracy dictionary).
            # FIXME: Move the next 5 lines into the (if mode == 'accuracy') block,
            # once the submission checker no longer complains as follows:
            # "performance/run_1 has file list mismatch (['mlperf_log_accuracy.json'])"
            accuracy_json_name = 'mlperf_log_accuracy.json'
            accuracy_json_path = os.path.join(last_dir, accuracy_json_name)
            with open(accuracy_json_path, 'w') as accuracy_json_file:
                json.dump(mlperf_log.get('accuracy',{}), accuracy_json_file)
                print('  |_ %s' % accuracy_json_name)
            if mode == 'accuracy':
                # FIXME: Do not hardcode - locate via CK.
                accuracy_imagenet_py = '$HOME/CK_TOOLS/mlperf-inference-upstream.master/inference/v0.5/classification_and_detection/tools/accuracy-imagenet.py'
                imagenet_val_file = '$HOME/CK_TOOLS/dataset-imagenet-ilsvrc2012-aux/val.txt'
                accuracy_txt_name = 'accuracy.txt'
                accuracy_txt_path = os.path.join(last_dir, accuracy_txt_name)
                accuracy_txt = !python3 $accuracy_imagenet_py --imagenet-val-file $imagenet_val_file --mlperf-accuracy-file $accuracy_json_path
                with open(accuracy_txt_path, 'w') as accuracy_txt_file:
                    accuracy_txt_file.writelines(accuracy_txt)
                    # Print the first line containing accuracy info.
                    print('  |_ %s ("%s")' % (accuracy_txt_name, accuracy_txt[0]))
#             # Trace file (an omit trace from v0.5).
#             trace_json_name = 'mlperf_log_trace.json'
#             trace_json_path = os.path.join(last_dir, trace_json_name)
#             with open(trace_json_path, 'w') as trace_json_file:
#                 json.dump(mlperf_log.get('trace',{}), trace_json_file)
    
    # FIXME: Do not hardcode - locate via CK.
    submission_checker_py = '$HOME/CK_TOOLS/mlperf-inference-upstream.master/inference/v0.5/tools/submission/submission-checker.py'
    !python3 $submission_checker_py --input $root_dir --submitter $submitter
    return

dfs = get_experimental_results(repo_uoa)

### Plot experimental data