In [1]:
# Load Python Packages
# 1. Basic Data Processing Packages
import pandas as pd
import numpy as np
import math
import pickle

# 2. Machine Learning Packages (Modeling, Clustering, Errors)
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error, r2_score, mean_squared_error
from scipy.cluster.hierarchy import fcluster, dendrogram, linkage
from scipy.spatial.distance import squareform

# 3. Deep Learning Package
import tensorflow as tf

# 4. Other Packages
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

### 01-1. Experiments Setting

In [2]:
# Global Variables
INSTANCE_LIST = ['g4dn.xlarge', 'g5.xlarge']
ANCHOR_INSTANCE = INSTANCE_LIST[0] # Anchor Instance Setting
PRED_INSTANCES = [x for x in INSTANCE_LIST if x != ANCHOR_INSTANCE]
ANCHOR_NAME = ANCHOR_INSTANCE[:2]

CLUSTER_METHOD = 'average'
CLUSTER_THRESHOLD = 20

In [3]:
# Load and check anchor data
pd.set_option('display.max_columns', 137)
pd.set_option('display.max_rows', 13)

anchor_data = pickle.load(open(f"../../dataset/anchor-data/anchor_{ANCHOR_NAME}.pickle", 'rb'))
anchor_data

Unnamed: 0,Host_OneHot,Device_Log1p,Host_Floor,Device_Sub,Device_Softmax,Host_SelectV2,Host_TensorSliceDataset,Host_Shape,Host_Dataset,Host_Identity,Device_Equal,Host_Cast,Device_ConcatV2,Host_ConcatV2,Device__Send,Device_Split,Host_WriteSummary,Device_DivNoNan,Host_TensorDataset,Host_RegexSplitWithOffsets,Host_Sum,Device_Einsum,Host_AddV2,Host__HostSend,Host_CaseFoldUTF8,Device_Sum,Host_NormalizeUTF8,Host__Send,Host_StopGradient,Device_GreaterEqual,Device__Recv,Device_StridedSlice,Host_GatherV2,Device_LogicalAnd,Host_BroadcastTo,Device_Reciprocal,Device_Pow,Device_ResourceGather,Host_RaggedTensorToTensor,Device_AddV2,Host_Maximum,Device_Unique,Device_Square,Host_LessEqual,Device_TanhGrad,Host_Range,Host_RaggedTensorFromVariant,Device_ReverseV2,Host_Slice,Device_Pack,Device_Sigmoid,Host__Recv,Host_BroadcastArgs,Host_AssignVariableOp,Device_L2Loss,Device_SigmoidGrad,Device_OneHot,Device_Slice,Host_ReadVariableOp,Device_Tanh,Host_TensorListFromTensor,Device_IDLE,Host_RaggedTensorToVariant,Device_Exp,Device_ArgMax,Device_MatMul,Host_Sub,Host__HostRecv,Device_Cast,Device_ResourceScatterAdd,Device_TensorListStack,Device_StridedSliceGrad,Host_LogicalOr,Device_RealDiv,Device_Mean,Device_ResourceApplyAdadelta,Device_Fill,Host_Min,Host_Equal,Host_StridedSlice,Device_Greater,Device_Neg,Device_UnsortedSegmentSum,Device_DynamicStitch,Device_BiasAddGrad,Device_Mul,Host_RealDiv,Device_SquaredDifference,Device__HostRecv,Device_Select,Host_LookupTableFindV2,Host_Mul,Host_Minimum,Device_AssignAddVariableOp,Host_GreaterEqual,Device_ZerosLike,Host_StaticRegexReplace,Host_ConcatenateDataset,Host_ExpandDims,Host_FlushSummaryWriter,Host_Transpose,Host_Reshape,Device_BiasAdd,Host_TensorListStack,Device_RsqrtGrad,Host_LogicalAnd,Device_ResourceSparseApplyAdadelta,Device_CudnnRNN,Device_RandomUniform,Device_AddN,Device_CudnnRNNBackprop,Host_IDLE,Host_IteratorGetNext,Device_Sqrt,Device_SoftmaxCrossEntropyWithLogits,Host_NoOp,Device_ResourceApplyAdam,Host_Max,Host_ZerosLike,Device_ReadVariableOp,Host_Pack,Device_TensorListFromTensor,Device_Transpose,Device_Tile,Device_BroadcastTo,Device_AssignSubVariableOp,Host_RaggedGather,Host_WordpieceTokenizeWithOffsets,exp_name,instance_name,dataset,model,optimizer,batchsize,epoch_latency,batch_latency,g5.xlarge
0,0.0,6.528,0.0,7.199,17.696,0.0,0.0,0.0,464.682,3.637,5.216,0.0,212.665,0.0,20.64,63.325,25.358,17.087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.048,0.0,354.215,0.0,12.031,39.647,0.0,127.738,2.4,0.0,5.344,0.0,40.639,0.0,11.712,0.0,347.576,0.0,0.0,0.0,0.0,0.0,153.788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.367,9.008,0.0,0.0,34864.902,0.0,4.32,24.096,42.496,0.0,0.0,37.792,0.0,0.0,92.99,0.0,4.095,4.096,178.458,6.751,0.0,0.0,0.0,0.0,14.24,118.301,8.672,7.2,52.607,0.0,0.0,28.256,23.967,0.0,0.0,0.0,29.822,0.0,104.86,0.0,0.0,0.0,44.06,0.0,0.0,6.24,0.0,0.0,49.673,33.375,7366.53,8.064,339.544,10673.535,143080.6,30.981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,873.586,5.312,6.528,0.0,0.0,0.0,exp10,g4dn.xlarge,reuters,bilstm,Adadelta,128,1.238734,0.015138,10034.08432
1,2214.833,0.0,41.909,0.0,1381.661,545.914,13.261,54.743,15101.474,196.484,367.448,302.087,780.691,73.973,201.753,0.0,177.774,1478.215,4.674,0.0,934.688,0.0,102.398,963.751,0.0,715.254,0.0,6978.883,12.534,0.0,31804.571,0.0,5206.673,54.973,19.512,0.0,0.0,1445.446,0.0,38743.466,86.377,35070.89,0.0,16.888,40395.698,32.844,0.0,0.0,20.928,0.0,0.0,0.0,14.99,35.643,0.0,0.0,0.0,815.634,771.503,33633.309,0.0,5021289.777,0.0,0.0,1397.535,593050.487,164.355,0.0,1295.711,0.0,8050.206,2707.494,16.802,0.0,0.0,14551.009,3203.678,65.117,83.677,192.343,0.0,0.0,7497.045,0.0,39273.955,828.308,97.492,0.0,43470.517,0.0,0.0,35.425,32.334,2362.352,21.823,55067.266,0.0,17.009,12.542,160.484,48.714,44.72,29612.08,0.0,0.0,431.744,1273.82,0.0,0.0,189334.407,0.0,66823010.0,714.439,0.0,5135.306,4.794,0.0,43.48,16.849,2.847,58.996,10929.815,5084.367,435.349,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,babi,rnn,Adadelta,128,3.618825,0.045379,47852.516174
2,0.0,10.976,0.0,13.951,38.207,0.0,0.0,0.0,465.425,3.777,9.664,0.0,0.0,0.0,30.016,0.0,23.81,36.511,0.0,0.0,0.0,0.0,0.0,16.434,0.0,16.863,0.0,416.029,0.0,13.184,1368.428,0.0,160.294,4.064,0.0,9.92,0.0,48.863,0.0,1714.839,0.0,852.012,0.0,0.0,1788.721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.184,1484.324,0.0,200107.078,0.0,9.536,53.279,22084.386,0.0,0.0,58.303,0.0,246.46,55.743,0.0,9.312,11.52,225.05,40.48,0.0,0.0,0.0,0.0,26.367,241.5,21.856,1681.71,59.806,0.0,0.0,1962.671,52.063,0.0,0.0,0.0,60.222,0.0,2455.602,0.0,0.0,0.0,43.049,0.0,0.0,1307.307,0.0,0.0,41.01,44.319,0.0,0.0,8415.697,0.0,743219.2,21.165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,511.922,237.562,10.592,13.152,0.0,0.0,0.0,exp10,g4dn.xlarge,reuters,rnn,Adadelta,128,5.86938,0.079334,83506.345749
3,0.0,4.544,0.0,4.384,0.0,0.0,0.0,0.0,493.907,4.017,4.095,0.0,238.008,0.0,21.919,106.142,23.573,18.207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.896,0.0,420.226,0.0,9.92,32.702,0.0,114.303,2.048,0.0,4.48,0.0,96.254,0.0,8.224,0.0,361.176,0.0,0.0,0.0,0.0,0.0,323.002,0.0,0.0,4.96,0.0,0.0,0.0,0.0,0.0,0.0,8.192,8.223,0.0,0.0,28300.737,0.0,4.32,0.0,20.256,0.0,0.0,44.607,0.0,0.0,95.358,0.0,4.096,0.0,181.565,6.144,0.0,0.0,0.0,4.096,12.799,126.942,11.711,5.664,48.191,0.0,0.0,29.216,23.104,0.0,0.0,0.0,32.639,0.0,107.039,0.0,0.0,0.0,68.265,0.0,0.0,4.608,0.0,0.0,42.411,97.342,7899.973,7.071,447.832,10932.544,157389.8,23.199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1225.259,5.12,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,imdb,bilstm,Adadelta,128,4.004237,0.013931,10093.808174
4,0.0,8.448,0.0,12.992,35.808,0.0,0.0,0.0,458.772,4.037,8.672,0.0,5622.593,0.0,30.176,4770.46,25.232,35.647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.144,0.0,850.07,0.0,11663.21,7968.998,4933.456,140.438,4.384,0.0,9.664,0.0,49.632,0.0,8509.769,0.0,829.453,0.0,0.0,3564.362,0.0,0.0,0.0,0.0,0.0,3353.154,0.0,0.0,0.0,0.0,5191.062,0.0,0.0,8.697,2945.603,0.0,1053488.986,0.0,10.048,50.304,87394.691,0.0,0.0,8581.194,0.0,249.627,10666.886,0.0,8.511,10.56,221.529,2635.266,0.0,0.0,0.0,0.0,25.472,233.594,22.24,6574.487,57870.532,0.0,0.0,11154.921,53.758,0.0,0.0,0.0,57.055,0.0,2435.407,0.0,0.0,0.0,55.962,0.0,0.0,5240.634,0.0,0.0,55.672,46.592,0.0,17887.483,15685.504,0.0,4038155.0,28.245,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,501.368,236.315,11.52,12.288,0.0,0.0,0.0,exp10,g4dn.xlarge,reuters,lstm,Adadelta,128,27.836915,0.382557,393855.333328
5,0.0,8.16,0.0,8.544,27.936,0.0,0.0,0.0,494.216,4.276,8.48,0.0,0.0,0.0,23.232,0.0,23.25,34.368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.391,0.0,647.675,0.0,9.76,1407.126,0.0,113.644,5.6,0.0,9.76,0.0,52.511,0.0,1725.371,0.0,849.072,0.0,0.0,1795.393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.935,1490.301,0.0,209975.741,0.0,9.536,0.0,22059.199,0.0,0.0,75.358,0.0,247.484,55.679,0.0,8.576,0.0,211.451,43.135,0.0,0.0,0.0,8.351,24.928,121.342,23.2,1678.845,42.111,0.0,0.0,1948.163,47.36,0.0,0.0,0.0,59.936,0.0,2436.002,0.0,0.0,0.0,45.558,0.0,0.0,1279.02,0.0,0.0,40.24,58.91,0.0,0.0,8329.245,0.0,655615.8,22.489,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,512.081,238.202,11.552,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,imdb,rnn,Adadelta,128,17.78044,0.080052,83688.378334
6,13303.845,0.0,301.393,0.0,710.865,929.929,16.293,87.06,16090.642,197.119,206.908,1285.508,19104.94,128.204,708.976,9334.251,426.295,749.389,4.725,0.0,5616.1,0.0,179.367,1671.601,0.0,377.715,0.0,34072.74,20.466,0.0,7337.08,0.0,11740.141,40.384,34.328,0.0,0.0,5800.946,0.0,0.0,132.178,16063.009,0.0,30.879,0.0,60.086,0.0,22555.529,33.889,0.0,0.0,0.0,26.381,34.571,0.0,0.0,0.0,10463.37,823.2,0.0,0.0,5237346.254,0.0,0.0,851.756,2165.521,486.923,0.0,744.949,0.0,0.0,21942.514,28.415,0.0,0.0,12782.83,497.266,238.576,142.934,331.21,0.0,0.0,45112.388,0.0,294.428,410.239,470.844,0.0,172.571,0.0,0.0,60.86,122.265,1207.045,37.7,21027.96,0.0,16.98,21.456,157.553,95.063,57.738,206.108,0.0,0.0,791.913,665.041,1497029.039,0.0,72385.422,2394829.04,102625600.0,760.365,0.0,2544.331,7.894,0.0,142.49,29.629,0.128,92.009,0.0,168514.164,210.17,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,babi,bilstm,Adadelta,128,4.510481,0.056435,38265.705109
7,7555.389,0.0,172.309,0.0,694.29,627.26,16.771,52.937,15520.493,193.951,200.477,677.984,9263.664,82.001,3898.372,4874.574,253.499,756.082,5.045,0.0,2905.702,0.0,111.945,3244.769,0.0,377.303,0.0,54943.926,12.236,0.0,4105.951,0.0,11606.892,27.616,23.306,0.0,0.0,5846.085,0.0,0.0,82.18,16194.164,0.0,20.283,0.0,34.964,0.0,0.0,20.751,0.0,0.0,0.0,15.787,36.297,0.0,0.0,0.0,5049.67,780.199,0.0,0.0,2537603.398,0.0,0.0,833.072,1802.429,260.756,0.0,705.167,0.0,0.0,10837.897,18.399,0.0,0.0,7150.815,453.556,142.77,92.776,210.12,0.0,0.0,45076.631,0.0,300.689,419.573,256.912,0.0,163.678,0.0,0.0,38.732,65.34,1207.558,25.006,10520.077,0.0,17.6,14.6,174.31,64.579,37.872,204.892,0.0,0.0,569.52,660.878,748892.853,0.0,28887.092,1195828.631,50736770.0,691.581,0.0,2529.736,5.832,0.0,77.052,20.47,2.816,57.03,0.0,84013.261,219.837,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,babi,lstm,Adadelta,128,2.518257,0.031523,21122.694016
8,0.0,9.248,0.0,8.704,0.0,0.0,0.0,0.0,532.45,3.779,9.695,0.0,5991.441,0.0,23.776,8025.553,24.703,36.352,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.304,0.0,605.368,0.0,11957.141,7896.793,5377.125,105.597,5.664,0.0,9.727,0.0,105.406,0.0,8735.479,0.0,842.384,0.0,0.0,3614.479,0.0,0.0,0.0,0.0,0.0,3491.406,0.0,0.0,0.0,0.0,5293.687,0.0,0.0,9.181,2998.313,0.0,1205443.305,0.0,8.255,0.0,102807.064,0.0,0.0,8955.436,0.0,410.199,11042.637,0.0,9.6,0.0,224.856,2657.926,0.0,0.0,0.0,9.056,25.12,211.932,21.887,6090.422,59331.62,0.0,0.0,11071.008,44.0,0.0,0.0,0.0,58.686,0.0,2499.312,0.0,0.0,0.0,71.253,0.0,0.0,5586.344,0.0,0.0,43.411,120.51,0.0,18346.539,16277.619,0.0,3029340.0,23.293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,474.426,451.285,11.744,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,imdb,lstm,Adadelta,128,99.523454,0.46645,410079.717636
9,0.0,0.0,0.0,4492.486,1498.53,0.0,0.0,0.0,157.838,4.787,0.0,0.0,0.0,0.0,87087.162,0.0,27.65,0.0,0.0,21690.338,0.0,41565.535,0.0,177.795,2540.539,5799.883,2202.189,733077.861,0.0,1754.364,801.611,12.288,0.0,1066.766,0.0,0.0,2661.195,112.222,1284.622,12379.979,0.0,810.546,1109.065,0.0,1669.982,0.0,277.818,0.0,0.0,14.496,0.0,16094.707,0.0,0.0,417.848,0.0,13.344,0.0,10.647,1070.378,324.44,1663331.736,1553.011,0.0,0.0,393.237,0.0,2706.679,2424.593,105.086,0.0,110.526,0.0,2942.915,1172.648,0.0,0.0,0.0,0.0,0.0,0.0,713.619,205.692,22.272,15.872,40563.544,0.0,1530.625,1416.193,0.0,2973.861,0.0,0.0,0.0,0.0,182.524,2559.65,0.0,0.0,51.966,0.0,0.0,0.0,174.196,74.239,50.126,0.0,0.0,1479.649,5976.903,0.0,9204412.0,19.54,254.555,0.0,0.0,645.744,0.0,0.0,0.0,0.0,239.772,26.908,0.0,921.871,1039.659,424.214,95435.809,exp10,g4dn.xlarge,imdb,smallbert,adamw,128,112.799092,0.587234,551211.833954


In [4]:
# Get variables from anchor dataset for clustering and modeling

columns = list(anchor_data.columns)
host_cols = [x for x in columns if x.startswith('Host_')]
device_cols = [x for x in columns if x.startswith('Device_')]
latency_cols = ['epoch_latency', 'batch_latency']
workload_cols = ['instance_name', 'model', 'dataset', 'optimizer', 'batchsize', 'exp_name']

model_list = ['bilstm', 'lstm', 'rnn']
dataset_list = sorted(list(anchor_data['dataset'].value_counts().index))
batchsize_list = sorted(list(anchor_data['batchsize'].value_counts().index))
exp_list = sorted(list(anchor_data['exp_name'].value_counts().index))

### 01-2. Feature Engineering with NLP Clustering

In [5]:
# Function-1: levenshtein
# calculate levenshtein distance of two string (str_x, str_y)

def levenshtein(str_x, str_y):
    size_x = len(str_x) + 1
    size_y = len(str_y) + 1
    matrix = np.zeros((size_x, size_y))
    
    for x in range(size_x):
        matrix[x, 0] = x
    for y in range(size_y):
        matrix[0, y] = y
    for x in range(1, size_x):
        for y in range(1, size_y):
            if str_x[x-1] == str_y[y-1]:
                matrix[x, y] = min(matrix[x-1, y]+1, matrix[x-1, y-1], matrix[x, y-1]+1)
            else:
                matrix[x, y] = min(matrix[x-1, y]+1, matrix[x-1, y-1]+1, matrix[x, y-1]+1)
    return matrix[size_x-1, size_y-1]

In [6]:
# Calculate levenshtein distance matrix for every device operation pairs

feature_names = [x[7:] for x in device_cols] # remove "Device_" character
dist_matrix = pd.DataFrame(0, index=feature_names, columns=feature_names)
for x in feature_names:
    for y in feature_names:
        dist_matrix[x][y] = levenshtein(x, y)

In [7]:
# Apply hierarchical clustering to distance matrix

cluster = fcluster(linkage(squareform(dist_matrix), CLUSTER_METHOD), 10, criterion='distance')
cluster_feature = {i: [] for i in range(len(pd.DataFrame(cluster).value_counts()))}
for index, value in enumerate(feature_names):
    cluster_feature[cluster[index]-1].append(value)

In [8]:
anchor_data = pickle.load(open(f"../../dataset/anchor-data/anchor_{ANCHOR_NAME}.pickle", 'rb'))

for key, value in cluster_feature.items():
    value = ["Device_" + x for x in value]
    
    # 피처 클러스터링 오류 수정
    if len(value) == 1:
        continue
    else:
        anchor_data["&".join(value)] = 0
        for feature in value:
            anchor_data["&".join(value)] += anchor_data[feature]
        anchor_data.drop(value, axis=1, inplace=True)
anchor_data

Unnamed: 0,Host_OneHot,Host_Floor,Host_SelectV2,Host_TensorSliceDataset,Host_Shape,Host_Dataset,Host_Identity,Host_Cast,Host_ConcatV2,Host_WriteSummary,Host_TensorDataset,Host_RegexSplitWithOffsets,Host_Sum,Host_AddV2,Host__HostSend,Host_CaseFoldUTF8,Host_NormalizeUTF8,Host__Send,Host_StopGradient,Host_GatherV2,Host_BroadcastTo,Host_RaggedTensorToTensor,Host_Maximum,Host_LessEqual,Host_Range,Host_RaggedTensorFromVariant,Host_Slice,Host__Recv,Host_BroadcastArgs,Host_AssignVariableOp,Host_ReadVariableOp,Host_TensorListFromTensor,Host_RaggedTensorToVariant,Host_Sub,Host__HostRecv,Host_LogicalOr,Host_Min,Host_Equal,Host_StridedSlice,Device_UnsortedSegmentSum,Device_DynamicStitch,Host_RealDiv,Device_SquaredDifference,Host_LookupTableFindV2,Host_Mul,Host_Minimum,Host_GreaterEqual,Host_StaticRegexReplace,Host_ConcatenateDataset,Host_ExpandDims,Host_FlushSummaryWriter,Host_Transpose,Host_Reshape,Host_TensorListStack,Host_LogicalAnd,Device_RandomUniform,Device_CudnnRNNBackprop,Host_IDLE,Host_IteratorGetNext,Device_SoftmaxCrossEntropyWithLogits,Host_NoOp,Host_Max,Host_ZerosLike,Host_Pack,Host_RaggedGather,Host_WordpieceTokenizeWithOffsets,exp_name,instance_name,dataset,model,optimizer,batchsize,epoch_latency,batch_latency,g5.xlarge,Device_ResourceGather&Device_ResourceScatterAdd,Device_ResourceApplyAdadelta&Device_ResourceSparseApplyAdadelta&Device_ResourceApplyAdam,Device_TensorListStack&Device_TensorListFromTensor,Device_AssignAddVariableOp&Device_ReadVariableOp&Device_AssignSubVariableOp,Device_StridedSlice&Device_StridedSliceGrad,Device_Log1p&Device_Sub&Device_Softmax&Device_Equal&Device_ConcatV2&Device__Send&Device_Split&Device_DivNoNan&Device_Einsum&Device_Sum&Device_GreaterEqual&Device__Recv&Device_LogicalAnd&Device_Reciprocal&Device_Pow&Device_AddV2&Device_Unique&Device_Square&Device_TanhGrad&Device_ReverseV2&Device_Pack&Device_Sigmoid&Device_L2Loss&Device_SigmoidGrad&Device_OneHot&Device_Slice&Device_Tanh&Device_IDLE&Device_Exp&Device_ArgMax&Device_MatMul&Device_Cast&Device_RealDiv&Device_Mean&Device_Fill&Device_Greater&Device_Neg&Device_BiasAddGrad&Device_Mul&Device__HostRecv&Device_Select&Device_ZerosLike&Device_BiasAdd&Device_RsqrtGrad&Device_CudnnRNN&Device_AddN&Device_Sqrt&Device_Transpose&Device_Tile&Device_BroadcastTo
0,0.0,0.0,0.0,0.0,0.0,464.682,3.637,0.0,0.0,25.358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,354.215,0.0,127.738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118.301,8.672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.06,0.0,0.0,0.0,49.673,8.064,10673.535,143080.6,30.981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,reuters,bilstm,Adadelta,128,1.238734,0.015138,10034.08432,40.639,211.833,0.0,29.822,92.99,44760.687
1,2214.833,41.909,545.914,13.261,54.743,15101.474,196.484,302.087,73.973,177.774,4.674,0.0,934.688,102.398,963.751,0.0,0.0,6978.883,12.534,5206.673,19.512,0.0,86.377,16.888,32.844,0.0,20.928,0.0,14.99,35.643,771.503,0.0,0.0,164.355,0.0,16.802,65.117,83.677,192.343,7497.045,0.0,97.492,0.0,0.0,35.425,32.334,21.823,0.0,17.009,12.542,160.484,48.714,44.72,0.0,431.744,0.0,0.0,66823010.0,714.439,5135.306,4.794,43.48,16.849,58.996,0.0,0.0,exp10,g4dn.xlarge,babi,rnn,Adadelta,128,3.618825,0.045379,47852.516174,1445.446,15824.829,18980.021,2365.199,2707.494,6168787.0
2,0.0,0.0,0.0,0.0,0.0,465.425,3.777,0.0,0.0,23.81,0.0,0.0,0.0,0.0,16.434,0.0,0.0,416.029,0.0,160.294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.184,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,241.5,21.856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43.049,0.0,0.0,0.0,41.01,0.0,0.0,743219.2,21.165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,reuters,rnn,Adadelta,128,5.86938,0.079334,83506.345749,48.863,269.369,758.382,60.222,55.743,245988.103
3,0.0,0.0,0.0,0.0,0.0,493.907,4.017,0.0,0.0,23.573,0.0,0.0,0.0,0.0,0.0,0.0,0.0,420.226,0.0,114.303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.223,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,126.942,11.711,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,68.265,0.0,0.0,0.0,42.411,7.071,10932.544,157389.8,23.199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,imdb,bilstm,Adadelta,128,4.004237,0.013931,10093.808174,96.254,278.907,0.0,32.639,95.358,39353.96
4,0.0,0.0,0.0,0.0,0.0,458.772,4.037,0.0,0.0,25.232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,850.07,0.0,140.438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.697,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,233.594,22.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.962,0.0,0.0,0.0,55.672,17887.483,0.0,4038155.0,28.245,0.0,0.0,0.0,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,reuters,lstm,Adadelta,128,27.836915,0.382557,393855.333328,49.632,268.121,750.995,57.055,15600.342,1306062.997
5,0.0,0.0,0.0,0.0,0.0,494.216,4.276,0.0,0.0,23.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,647.675,0.0,113.644,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.935,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.342,23.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.558,0.0,0.0,0.0,40.24,0.0,0.0,655615.8,22.489,0.0,0.0,0.0,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,imdb,rnn,Adadelta,128,17.78044,0.080052,83688.378334,52.511,270.361,759.565,59.936,55.679,255637.818
6,13303.845,301.393,929.929,16.293,87.06,16090.642,197.119,1285.508,128.204,426.295,4.725,0.0,5616.1,179.367,1671.601,0.0,0.0,34072.74,20.466,11740.141,34.328,0.0,132.178,30.879,60.086,0.0,33.889,0.0,26.381,34.571,823.2,0.0,0.0,486.923,0.0,28.415,238.576,142.934,331.21,45112.388,0.0,470.844,0.0,0.0,60.86,122.265,37.7,0.0,16.98,21.456,157.553,95.063,57.738,0.0,791.913,0.0,2394829.04,102625600.0,760.365,2544.331,7.894,142.49,29.629,92.009,0.0,0.0,exp10,g4dn.xlarge,babi,bilstm,Adadelta,128,4.510481,0.056435,38265.705109,5800.946,13447.871,0.0,1207.173,21942.514,7089508.263
7,7555.389,172.309,627.26,16.771,52.937,15520.493,193.951,677.984,82.001,253.499,5.045,0.0,2905.702,111.945,3244.769,0.0,0.0,54943.926,12.236,11606.892,23.306,0.0,82.18,20.283,34.964,0.0,20.751,0.0,15.787,36.297,780.199,0.0,0.0,260.756,0.0,18.399,142.77,92.776,210.12,45076.631,0.0,256.912,0.0,0.0,38.732,65.34,25.006,0.0,17.6,14.6,174.31,64.579,37.872,0.0,569.52,0.0,1195828.631,50736770.0,691.581,2529.736,5.832,77.052,20.47,57.03,0.0,0.0,exp10,g4dn.xlarge,babi,lstm,Adadelta,128,2.518257,0.031523,21122.694016,5846.085,7811.693,0.0,1210.374,10837.897,3460461.737
8,0.0,0.0,0.0,0.0,0.0,532.45,3.779,0.0,0.0,24.703,0.0,0.0,0.0,0.0,0.0,0.0,0.0,605.368,0.0,105.597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,211.932,21.887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,71.253,0.0,0.0,0.0,43.411,18346.539,0.0,3029340.0,23.293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,exp10,g4dn.xlarge,imdb,lstm,Adadelta,128,99.523454,0.46645,410079.717636,105.406,345.366,884.625,58.686,16419.762,1480247.262
9,0.0,0.0,0.0,0.0,0.0,157.838,4.787,0.0,0.0,27.65,0.0,21690.338,0.0,0.0,177.795,2540.539,2202.189,733077.861,0.0,0.0,0.0,1284.622,0.0,0.0,0.0,277.818,0.0,16094.707,0.0,0.0,10.647,324.44,1553.011,0.0,2706.679,0.0,0.0,0.0,0.0,205.692,22.272,0.0,1530.625,2973.861,0.0,0.0,0.0,2559.65,0.0,0.0,51.966,0.0,0.0,174.196,50.126,1479.649,0.0,9204412.0,19.54,0.0,0.0,0.0,0.0,0.0,424.214,95435.809,exp10,g4dn.xlarge,imdb,smallbert,adamw,128,112.799092,0.587234,551211.833954,217.308,645.744,239.772,1039.659,122.814,1884624.527


### 01-3. Modeling and Validation

In [9]:
# define deep neural network regression model with custom learning rate, loss, layers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9)

def build_dnn_model(input_shape):
    model=tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(128,activation="relu", input_shape=input_shape))
    model.add(tf.keras.layers.Dense(64,activation="relu"))
    model.add(tf.keras.layers.Dense(32,  activation="relu"))
    model.add(tf.keras.layers.Dense(16,  activation="relu"))
    model.add(tf.keras.layers.Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
                  loss=['mean_absolute_percentage_error', 'mean_squared_error'],
                  loss_weights=[1., 1.])
    return model

In [10]:
# Function-2: train_test_model
# 1. Get train/test dataset and target instance(pred_instance)
# 2. Split dataset into feature and target (x, y)
# 3. train three single model (linear, random forest, dnn)
# 4. predict with three model, then median ensemble to results
# 5. return prediction results

def train_test_model(train_data, test_data, pred_instance, drop_cols):
    train_x = train_data.drop(drop_cols + PRED_INSTANCES, axis=1)
    train_simple_x = np.array(train_data['batch_latency']).reshape(-1, 1)
    train_y = train_data[pred_instance]
    
    test_x = test_data.drop(drop_cols + PRED_INSTANCES, axis=1)
    test_simple_x = np.array(test_data['batch_latency']).reshape(-1, 1)
    test_y = test_data[[pred_instance]].to_numpy()
    
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    model_dnn = build_dnn_model((train_x.shape[1],))
    model_dnn.fit(train_x, train_y, epochs=200,
              callbacks=[callback],
              batch_size=16,
              verbose=0)
    model_rfr = RandomForestRegressor()
    model_rfr.fit(train_x, train_y)
    model_simple = LinearRegression()
    model_simple.fit(train_simple_x, train_y)
    
    dnn_pred_y = model_dnn.predict(test_x)
    dnn_pred_y = dnn_pred_y.reshape(-1, 1)
    rfr_pred_y = model_rfr.predict(test_x)
    rfr_pred_y = rfr_pred_y.reshape(-1, 1)
    simple_pred_y = model_simple.predict(test_simple_x)
    simple_pred_y = simple_pred_y.reshape(-1, 1)
    median_pred_y = np.median(np.stack([
        dnn_pred_y, rfr_pred_y, simple_pred_y
    ]), axis=0)
    
#     print('dnn_pred_y',dnn_pred_y)
#     print('rfr_pred_y', rfr_pred_y)
#     print('simple_pred_y',simple_pred_y)
#     print('median_pred_y', median_pred_y)
    
    return test_y, dnn_pred_y,rfr_pred_y,simple_pred_y,median_pred_y, test_data

In [11]:
# Function-3: model_validation
# 1. run model-validation and save result to dictionary of dictionary
#    {'g3s.xlarge': {'AlexNet': ... , 'VGG16': ...}, 'g4dn.xlarge': {...}}
# 2. split traing and test dataset by model(dnn-architecture) condition
# 3. execute train_test_model function, the save result to dictionary
from sklearn.preprocessing import MinMaxScaler

def model_validation():
    pred_instance_dict = {}
    for pred_instance in PRED_INSTANCES:
        pred_model_dict = {}
        for val_model in model_list:
            
            train_data = anchor_data[(anchor_data['dataset'] == 'reuters')&(anchor_data['model'] != 'smallbert')]            
            test_data = anchor_data[(anchor_data['dataset'] == 'reuters')&(anchor_data['model'] != 'smallbert')]            

            print(f"Validation Model: {val_model}")
            print(f"Train Data Size: {train_data['model']}")
            print(f"Test Data Size: {test_data['model']}")
            print('-------------------------------------')
            test_y, dnn_pred_y, rfr_pred_y, simple_pred_y, median_pred_y, test_data = train_test_model(
                train_data, test_data, pred_instance,
                latency_cols + workload_cols + PRED_INSTANCES)
            
            pred_model_dict[val_model] = (test_y, dnn_pred_y, rfr_pred_y, simple_pred_y, median_pred_y, test_data)
        pred_instance_dict[pred_instance] = pred_model_dict
        return pred_instance_dict

In [12]:
pred_instance_dict = model_validation()

Validation Model: bilstm
Train Data Size: 0    bilstm
2       rnn
4      lstm
Name: model, dtype: object
Test Data Size: 0    bilstm
2       rnn
4      lstm
Name: model, dtype: object
-------------------------------------
Validation Model: lstm
Train Data Size: 0    bilstm
2       rnn
4      lstm
Name: model, dtype: object
Test Data Size: 0    bilstm
2       rnn
4      lstm
Name: model, dtype: object
-------------------------------------
Validation Model: rnn
Train Data Size: 0    bilstm
2       rnn
4      lstm
Name: model, dtype: object
Test Data Size: 0    bilstm
2       rnn
4      lstm
Name: model, dtype: object
-------------------------------------


### 01-4. Preprocessing Validation Results

In [13]:
# Function-4: build_true_pred_df
# convert dictionary to dataframe

def build_true_pred_df(pred_instance_dict, instance, model):
    true_y = pred_instance_dict[instance][model][0]
    dnn_y = pred_instance_dict[instance][model][1]
    test_df = pred_instance_dict[instance][model][5]

    pred_df = test_df[['model', 'dataset', 'batchsize', 'exp_name']]
    pred_df[f"{instance}_true"] = true_y
    pred_df['dnn_y'] = dnn_y
    pred_df['batchsize'] = pd.to_numeric(pred_df['batchsize'])
    pred_df['anchor_mape'] = abs((true_y - dnn_y) / true_y) * 100

    return pred_df

In [14]:
all_result_true_pred_df = []
for test_instance in PRED_INSTANCES:
    single_target_true_pred_df_list = []
    for test_model in model_list:
        true_pred_df = build_true_pred_df(pred_instance_dict, test_instance, test_model)
        single_target_true_pred_df_list.append(true_pred_df)
    single_target_true_pred_df = pd.concat(single_target_true_pred_df_list)
    all_result_true_pred_df.append(single_target_true_pred_df)  
    
# pd.set_option('display.max_rows', None)
print(all_result_true_pred_df[0][:])

    model  dataset  batchsize exp_name  g5.xlarge_true          dnn_y  \
0  bilstm  reuters        128    exp10    10034.084320   15297.963867   
2     rnn  reuters        128    exp10    83506.345749   80945.132812   
4    lstm  reuters        128    exp10   393855.333328  439563.875000   
0  bilstm  reuters        128    exp10    10034.084320   11254.816406   
2     rnn  reuters        128    exp10    83506.345749   62683.386719   
4    lstm  reuters        128    exp10   393855.333328  341492.156250   
0  bilstm  reuters        128    exp10    10034.084320   14108.250000   
2     rnn  reuters        128    exp10    83506.345749   75092.539062   
4    lstm  reuters        128    exp10   393855.333328  405278.812500   

   anchor_mape  
0    52.459989  
2     3.067088  
4    11.605414  
0    12.165854  
2    24.935780  
4    13.295028  
0    40.603263  
2    10.075649  
4     2.900425  
