In [8]:
import json
import os
import numpy as np

In [None]:
"""
When measuring the inference latency of a DNN model, in order to eliminate the impact of environment
noises, instead of recording the prediction latency for each individual testing sample once, we measure the inference
latency of the entire testing dataset for 30 times and take the median value.
"""

### MNIST CNN

In [68]:
mnist_cnn_recorder = dict()

In [69]:
parent_dir = "TOSEM_data/cnn_image_data/cnn_image_data_server/"
for folder in os.listdir(parent_dir):
    if folder == '.DS_Store':
        continue
    for record in os.listdir(os.path.join(parent_dir, folder)):
        if record.endswith("metrics"):
            target = os.path.join(parent_dir, folder, record)
            with open(target, "r") as ifile:
                temp = ifile.read()
                temp = json.loads(temp)
                mnist_cnn_recorder[folder + "_" + record] = temp

In [70]:
simlified_cnn_result_list = list()
for key in mnist_cnn_recorder:
    temp = dict()
    temp["hyperparameters"] = mnist_cnn_recorder[key]["hyperparameters"]
    temp["accuracy"] = np.median(mnist_cnn_recorder[key]["original_model"]["accuracy"])
    simlified_cnn_result_list.append(temp)

In [71]:
sorted(simlified_cnn_result_list, key=lambda x:x["accuracy"], reverse=True)

[{'hyperparameters': {'dropout_ratio': 0.5,
   'activation': 'relu',
   'filters': 192,
   'kernel_size': 7,
   'optimizer': 'Adagrad',
   'learning_rate': 0.06350483197533612,
   'batch_size': 8,
   'epochs': 44,
   'pool_type': 'max',
   'loss_function': 'kullback_leibler_divergence'},
  'accuracy': 0.9928},
 {'hyperparameters': {'dropout_ratio': 0.5,
   'activation': 'relu',
   'filters': 96,
   'kernel_size': 9,
   'optimizer': 'SGD',
   'learning_rate': 0.007360097910140977,
   'batch_size': 2,
   'epochs': 86,
   'pool_type': 'max',
   'loss_function': 'kullback_leibler_divergence'},
  'accuracy': 0.9927},
 {'hyperparameters': {'activation': 'relu',
   'filters': 192,
   'kernel_size': 9,
   'dropout_ratio': 0.8179855278075513,
   'optimizer': 'Adagrad',
   'learning_rate': 0.08004071724388537,
   'batch_size': 16,
   'epochs': 79,
   'pool_type': 'avg',
   'loss_function': 'kullback_leibler_divergence'},
  'accuracy': 0.9925},
 {'hyperparameters': {'optimizer': 'Adam',
   'filte

### Resnet

In [64]:
resnet_cnn_recorder = dict()

In [65]:
parent_dir = "TOSEM_data/resnet_data/resnet_data_server/"
for folder in os.listdir(parent_dir):
    if folder == '.DS_Store':
        continue
    for record in os.listdir(os.path.join(parent_dir, folder)):
        if record.endswith("metrics"):
            target = os.path.join(parent_dir, folder, record)
            with open(target, "r") as ifile:
                temp = ifile.read()
                temp = json.loads(temp)
                resnet_cnn_recorder[folder + "_" + record] = temp

In [66]:
simlified_resnet_result_list = list()
for key in resnet_cnn_recorder:
    temp = dict()
    temp["hyperparameters"] = resnet_cnn_recorder[key]["hyperparameters"]
    temp["accuracy"] = np.median(resnet_cnn_recorder[key]["original_model"]["accuracy"])
    simlified_resnet_result_list.append(temp)

In [67]:
sorted(simlified_resnet_result_list, key=lambda x:x["accuracy"], reverse=True)

[{'hyperparameters': {'filters': 192,
   'kernel_size': 3,
   'activation': 'sigmoid',
   'dropout_ratio': 0.6940142047640246,
   'optimizer': 'Adamax',
   'learning_rate': 0.0007035352823091709,
   'batch_size': 256,
   'epochs': 56,
   'pool_type': 'avg',
   'loss_function': 'categorical_crossentropy'},
  'accuracy': 0.9384},
 {'hyperparameters': {'activation': 'relu',
   'kernel_size': 5,
   'filters_1': 64,
   'filters_2': 128,
   'filters_3': 128,
   'filters_4': 256,
   'filters_5': 64,
   'dropout_ratio': 0.19012400969749255,
   'pooling_1': 'max',
   'pooling_2': 'globalmax',
   'optimizer': 'Nadam',
   'learning_rate': 0.0006515972222658095,
   'batch_size': 16,
   'epochs': 84,
   'loss_function': 'kullback_leibler_divergence'},
  'accuracy': 0.7837},
 {'hyperparameters': {'learning_rate': 0.01,
   'kernel_size': 5,
   'filters_1': 128,
   'filters_2': 128,
   'filters_3': 128,
   'filters_4': 256,
   'filters_5': 64,
   'activation': 'relu',
   'dropout_ratio': 0.37835993947

### CNN for Text

In [60]:
text_cnn_recorder = dict()

In [61]:
parent_dir = "TOSEM_data/cnn_text_data/cnn_text_data_server/"
for folder in os.listdir(parent_dir):
    if folder == '.DS_Store':
        continue
    for record in os.listdir(os.path.join(parent_dir, folder)):
        if record.endswith("metrics"):
            target = os.path.join(parent_dir, folder, record)
            with open(target, "r") as ifile:
                temp = ifile.read()
                temp = json.loads(temp)
                text_cnn_recorder[folder + "_" + record] = temp

In [62]:
simlified_textcnn_result_list = list()
for key in text_cnn_recorder:
    temp = dict()
    temp["hyperparameters"] = text_cnn_recorder[key]["hyperparameters"]
    temp["accuracy"] = np.median(text_cnn_recorder[key]["original_model"]["accuracy"])
    simlified_textcnn_result_list.append(temp)

In [63]:
sorted(simlified_textcnn_result_list, key=lambda x:x["accuracy"], reverse=True)

[{'hyperparameters': {'optimizer': 'Adam',
   'learning_rate': 0.001,
   'batch_size': 32,
   'epochs': 3,
   'filters_1': 128,
   'filters_2': 32,
   'kernel_size_1': 7,
   'kernel_size_2': 3,
   'neurons': 32,
   'embedding_dim': 32,
   'activation_1': 'tanh',
   'activation_2': 'relu',
   'activation_3': 'relu',
   'dropout_ratio_1': 0.8308531722197221,
   'dropout_ratio_2': 0.6989564855665645,
   'pool_type': 'globalavg',
   'loss_function': 'binary_crossentropy'},
  'accuracy': 0.8894},
 {'hyperparameters': {'dropout_ratio_1': 0.5,
   'dropout_ratio_2': 0.5,
   'filters_1': 256,
   'filters_2': 64,
   'kernel_size_1': 9,
   'kernel_size_2': 3,
   'neurons': 32,
   'embedding_dim': 32,
   'activation_1': 'relu',
   'activation_2': 'relu',
   'activation_3': 'tanh',
   'optimizer': 'Adam',
   'learning_rate': 0.0017166091177458257,
   'batch_size': 32,
   'epochs': 92,
   'pool_type': 'globalavg',
   'loss_function': 'binary_crossentropy'},
  'accuracy': 0.88432},
 {'hyperparameters

### LSTM

In [47]:
lstm_recorder = dict()

In [56]:
parent_dir = "TOSEM_data/lstm_data/lstm_data_server/all100_model_performance_properties/"
for folder in os.listdir(parent_dir):
    if folder == '.DS_Store':
        continue
    for record in os.listdir(os.path.join(parent_dir, folder)):
        if record.endswith("metrics"):
            target = os.path.join(parent_dir, folder, record)
            with open(target, "r") as ifile:
                temp = ifile.read()
                temp = json.loads(temp)
                lstm_recorder[folder + "_" + record] = temp

In [57]:
simlified_lstm_result_list = list()
for key in lstm_recorder:
    temp = dict()
    temp["hyperparameters"] = lstm_recorder[key]["hyperparameters"]
    temp["accuracy"] = np.median(lstm_recorder[key]["original_model"]["accuracy"])
    simlified_lstm_result_list.append(temp)

In [59]:
sorted(simlified_lstm_result_list, key=lambda x:x["accuracy"], reverse=True)

[{'hyperparameters': {'learning_rate': 0.001,
   'units_1': 256,
   'units_2': 16,
   'embedding_dim': 128,
   'dropout_ratio': 0.5876846893466073,
   'activation_1': 'tanh',
   'activation_2': 'sigmoid',
   'optimizer': 'RMSprop',
   'batch_size': 16,
   'epochs': 73,
   'loss_function': 'binary_crossentropy'},
  'accuracy': 0.89112},
 {'hyperparameters': {'learning_rate': 0.001,
   'units_1': 256,
   'units_2': 16,
   'embedding_dim': 128,
   'dropout_ratio': 0.5876846893466073,
   'activation_1': 'tanh',
   'activation_2': 'sigmoid',
   'optimizer': 'RMSprop',
   'batch_size': 16,
   'epochs': 73,
   'loss_function': 'binary_crossentropy'},
  'accuracy': 0.89112},
 {'hyperparameters': {'units_1': 64,
   'units_2': 128,
   'embedding_dim': 16,
   'dropout_ratio': 0.37321540014297205,
   'activation_1': 'tanh',
   'activation_2': 'sigmoid',
   'optimizer': 'RMSprop',
   'learning_rate': 0.0021501001617229517,
   'batch_size': 16,
   'epochs': 42,
   'loss_function': 'binary_crossentro