In [1]:
import os
import random
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score, explained_variance_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler  
from sklearn.pipeline import Pipeline

In [3]:
def accuracy(clf, test_set_X, test_set_y):
    print("MSE", mean_squared_error(test_set_y, clf.predict(test_set_X)))
    print("r2_score", r2_score(test_set_y, clf.predict(test_set_X), multioutput='variance_weighted'))
    print("explaing variance score", explained_variance_score(test_set_y, clf.predict(test_set_X), multioutput='variance_weighted'))

In [4]:
def accuracy_target_normalized(clf, test_set_X, test_set_y):
    print("MSE", mean_squared_error(test_set_y, target_scaler.inverse_transform(clf.predict(test_set_X))))
    print("r2_score", r2_score(test_set_y, target_scaler.inverse_transform(clf.predict(test_set_X)), multioutput='variance_weighted'))
    print("explaing variance score", explained_variance_score(test_set_y, target_scaler.inverse_transform(clf.predict(test_set_X)), multioutput='variance_weighted'))

In [126]:
dataset_path = './im-datasets/combined-exp-results/undera@jmeter-plugins@5ca25c4e5602/449690936/'
folders = [dataset_path+i for i in os.listdir(dataset_path) if 'INL' not in i]

In [127]:
test_classes = {}
index = 0

In [128]:
filename = 'Multi-JVM.csv'
X = []
y = []
for i, folder in enumerate(folders):
    csv_file = os.path.join(folder, filename)
    file = open(csv_file, 'r')
    file.readline()
    XX = []
    yy = []
    for line in file.readlines():
        line = line.strip().split(',')
        test_class = line[0]
        runtime = float(line[4])+float(line[5])+float(line[6])
        if test_class not in test_classes:
            test_classes[test_class] = index
            index += 1
        XX.append(test_classes[test_class])
        yy.append(runtime)
    X.append(XX)
    y.append(yy)
    break

In [129]:
np.mean(y),np.std(y)

(0.21779252336448598, 0.44961814903771385)

In [15]:
def statistics(X, y, training_set_y,test_y):
    print(dataset_path)
    total = np.sum(y,axis=1)
    min = np.min(total)
    max = np.max(total)
    
    min_index = np.argmin(total)
    max_index = np.argmax(total)
    print(f"max: {max}, min: {min}, diff: {max-min}")
    print(f"max_index: {max_index}, min_index: {min_index}\n")
    
    total = np.sum(training_set_y,axis=1)
    training_min = np.min(total)
    training_max = np.max(total)
    
    print("Train set")
    print(f"max: {training_max}, min: {training_min}, diff: {training_max-training_min}\n")
    
    total = np.sum(test_y,axis=1)
    test_min = np.min(total)
    test_max = np.max(total)
    print("Test set")
    print(f"max: {test_max}, min: {test_min}, diff: {test_max-test_min}\n")
    
    test_order1 = X[max_index]
    test_order2 = X[min_index]
    
    runtime_1 = y[max_index]
    runtime_2 = y[min_index]
    print("test class - test order1 - test order 2 - absoloute diff")
    max_diff = -np.inf
    for i,test_class in enumerate(test_order1):
        r1 = runtime_1[i]
        r2 = runtime_2[np.where(test_order2==test_class)[0][0]]
        diff = abs(r1-r2)
        if diff > max_diff:
            max_diff = diff
            s = f'{test_class},\t{r1}, \t{r2} \t{diff}'
        print(f'{test_class},\t{r1}, \t{r2} \t{diff}')
    
    print("Max diff")
    print(s)
    

In [8]:
random.seed(123)
sequence = np.arange(len(X))
np.random.shuffle(sequence)

In [9]:
X = np.array(X)
y = np.array(y)
no_training_set = int(len(X) * 0.7)

training_set_X = X[:no_training_set]
training_set_y = y[:no_training_set]

test_set_X = X[no_training_set:]
test_set_y = y[no_training_set:]

scaler = StandardScaler()  
scaler.fit(training_set_X) 

target_scaler = StandardScaler()
target_scaler.fit(training_set_y)

training_set_X_normalized = scaler.transform(training_set_X)  
test_set_X_normalized = scaler.transform(test_set_X)  

training_set_y_normalized = target_scaler.transform(training_set_y)  
test_set_y_normalized = target_scaler.transform(test_set_y)  

ValueError: Found array with 0 sample(s) (shape=(0, 41)) while a minimum of 1 is required by StandardScaler.

In [48]:
statistics(y, training_set_y, test_set_y)

./im-datasets/combined-exp-results/abel533@Mapper@b16688ed0060/424685119/
max: 3.4710000000000005, min: 3.3723999999999994, diff: 0.09860000000000113

Train set
max: 3.4636, min: 3.3723999999999994, diff: 0.09120000000000061

Test set
max: 3.4710000000000005, min: 3.3924000000000003, diff: 0.07860000000000023


In [55]:
statistics(y, training_set_y, test_set_y)

./im-datasets/combined-exp-results/demoiselle@framework@75caf12c6de6/308162910/
max: 3.8572000000000006, min: 3.7752, diff: 0.08200000000000074

Train set
max: 3.8572000000000006, min: 3.7817999999999996, diff: 0.07540000000000102

Test set
max: 3.8572000000000006, min: 3.7752, diff: 0.08200000000000074


In [23]:
statistics(y, training_set_y, test_set_y)

TypeError: statistics() missing 1 required positional argument: 'test_y'

In [24]:
statistics(X,y, training_set_y, test_set_y)

./im-datasets/combined-exp-results/orbit@orbit@7f6f338f294a/361637862/
max: 100.34860000000002, min: 93.9274, diff: 6.421200000000013
max_index: 8, min_index: 5

Train set
max: 100.34860000000002, min: 93.9274, diff: 6.421200000000013

Test set
max: 96.45920000000001, min: 94.10100000000001, diff: 2.3581999999999965

test class - test order1 - test order 2 - absoloute diff
36,	2.8158, 	1.0133999999999999 	1.8024
45,	0.7615, 	0.5447 	0.2168
23,	0.7095999999999999, 	0.5348999999999999 	0.17469999999999997
53,	5.5711, 	5.5287 	0.04240000000000066
49,	41.5147, 	41.38300000000001 	0.13169999999998794
54,	0.2869, 	0.26730000000000004 	0.01959999999999995
27,	8.3558, 	1.8038999999999998 	6.551900000000001
30,	0.8121, 	0.7232 	0.08890000000000009
25,	2.0429, 	2.0293 	0.013599999999999834
56,	1.0653, 	1.0315999999999999 	0.03370000000000006
13,	0.5463999999999999, 	0.5522 	0.005800000000000138
50,	0.5306, 	0.5170999999999999 	0.013500000000000068
18,	1.6126, 	1.569 	0.04360000000000008
37,	0.94

In [84]:
folders[14]

'./im-datasets/combined-exp-results/hs-web@hsweb-framework@183bbb8b349e/473260524/LSF+QTF'

In [86]:
y[5], y[14]

(array([3.33800e-01, 1.10310e+00, 1.21530e+00, 6.41400e-01, 3.64800e-01,
        4.28700e-01, 6.81000e-02, 2.80000e-02, 7.68000e-01, 1.38740e+00,
        1.39700e-01, 1.65500e-01, 1.82700e-01, 1.56700e-01, 2.20000e-03,
        1.80000e-03, 7.90000e-03, 7.10000e-03, 3.00400e-01, 1.40000e-02,
        6.22760e+00, 1.99586e+01, 3.27100e-01, 3.66000e-02, 3.98500e-01,
        2.59000e-01, 4.34400e-01, 9.70800e-01]),
 array([2.61200e-01, 3.63000e-01, 3.33800e-01, 2.01000e-02, 1.66100e-01,
        6.25200e-01, 2.85100e-01, 8.60000e-02, 3.92500e-01, 1.90600e-01,
        1.90000e-03, 1.70000e-03, 8.10000e-03, 8.40000e-03, 2.00000e-03,
        1.60500e-01, 3.13400e-01, 3.31900e-01, 1.10260e+00, 1.21370e+00,
        4.32000e-01, 9.74800e-01, 6.22020e+00, 5.02700e-01, 1.89500e-01,
        2.89400e-01, 1.38860e+00, 1.96262e+01]))

In [100]:
X[5], X[14], np.where(X[14]==X[5][0])[0][0]

(array([18, 19, 20, 27, 26, 11, 12, 13, 14, 15, 16, 17,  3,  4,  5,  6,  7,
         8,  9, 10, 24, 25,  0,  1,  2, 21, 22, 23]),
 array([21, 26, 13, 12, 11, 27,  1,  0,  2, 10,  6,  5,  8,  7,  3,  4,  9,
        18, 19, 20, 22, 23, 24, 16, 17, 14, 15, 25]),
 17)

In [50]:
training_set_y_normalized

array([[ 8.35836636e-02, -6.06926208e-01,  2.77239739e-01,
         3.67046572e-01, -2.69001865e-01, -9.78806412e-01,
        -8.60721174e-01, -8.23237189e-01, -1.58953999e+00,
        -2.90963566e-02, -1.27986025e+00,  3.13311237e-01,
        -2.55564207e-01, -8.29257769e-01,  1.46662766e-01,
         2.68355543e+00, -8.26808423e-01, -2.95365896e-01,
        -5.85041924e-01,  1.74811814e+00,  3.57475530e-02,
        -3.93195897e-01, -4.10000312e-01, -1.14212926e-01,
         4.27396701e+00,  2.33264309e+00, -9.25122608e-01,
        -1.15388331e+00],
       [-7.30360677e-01, -7.02463347e-01, -5.91628989e-01,
        -5.62995574e-01, -5.65187930e-01, -9.80419563e-01,
         5.89262957e-01,  3.02469996e-01, -3.22057652e-01,
        -6.41673794e-01,  3.11076789e-01, -1.62229345e-01,
        -2.51298833e-01,  3.32330056e-01, -6.99445702e-01,
         3.05118364e-01, -4.06227635e-01, -2.66213576e-01,
         2.07508941e+00, -3.51768873e-01, -4.64809688e-01,
        -2.47286897e-01,  1.99

In [10]:
def train(training_set_X, training_set_y, test_set_X, test_set_y, solver, alpha, hidden_layer_sizes, max_iter=500):
    clf = MLPRegressor(solver=solver, alpha=alpha,
                       hidden_layer_sizes=hidden_layer_sizes, random_state=1, verbose=True, max_iter=max_iter)
    clf.fit(training_set_X,training_set_y)
    print()
    accuracy_target_normalized(clf, test_set_X, test_set_y)
    return clf

In [42]:
clf = train(training_set_X_normalized, training_set_y, test_set_X_normalized, test_set_y,
      solver = 'adam', alpha=1e-3, hidden_layer_sizes=(41,41))

Iteration 1, loss = 0.14047206
Iteration 2, loss = 0.12689400
Iteration 3, loss = 0.11488179
Iteration 4, loss = 0.10431056
Iteration 5, loss = 0.09497273
Iteration 6, loss = 0.08675592
Iteration 7, loss = 0.07953764
Iteration 8, loss = 0.07327407
Iteration 9, loss = 0.06777879
Iteration 10, loss = 0.06294986
Iteration 11, loss = 0.05871483
Iteration 12, loss = 0.05498092
Iteration 13, loss = 0.05162365
Iteration 14, loss = 0.04864500
Iteration 15, loss = 0.04599116
Iteration 16, loss = 0.04360525
Iteration 17, loss = 0.04144787
Iteration 18, loss = 0.03950924
Iteration 19, loss = 0.03774511
Iteration 20, loss = 0.03615108
Iteration 21, loss = 0.03470108
Iteration 22, loss = 0.03336843
Iteration 23, loss = 0.03214719
Iteration 24, loss = 0.03104424
Iteration 25, loss = 0.03002889
Iteration 26, loss = 0.02906718
Iteration 27, loss = 0.02817896
Iteration 28, loss = 0.02734911
Iteration 29, loss = 0.02656549
Iteration 30, loss = 0.02582065
Iteration 31, loss = 0.02511072
Iteration 32, los

In [48]:
clf = train(training_set_X_normalized, training_set_y, test_set_X_normalized, test_set_y,
      solver = 'sgd', alpha=1e-3, hidden_layer_sizes=(41,41))

Iteration 1, loss = 0.14047206
Iteration 2, loss = 0.13341511
Iteration 3, loss = 0.12441621
Iteration 4, loss = 0.11443513
Iteration 5, loss = 0.10437035
Iteration 6, loss = 0.09467700
Iteration 7, loss = 0.08578844
Iteration 8, loss = 0.07785717
Iteration 9, loss = 0.07092923
Iteration 10, loss = 0.06488928
Iteration 11, loss = 0.05970772
Iteration 12, loss = 0.05527854
Iteration 13, loss = 0.05142930
Iteration 14, loss = 0.04809576
Iteration 15, loss = 0.04526399
Iteration 16, loss = 0.04279823
Iteration 17, loss = 0.04063580
Iteration 18, loss = 0.03872978
Iteration 19, loss = 0.03703482
Iteration 20, loss = 0.03552042
Iteration 21, loss = 0.03417563
Iteration 22, loss = 0.03296499
Iteration 23, loss = 0.03185239
Iteration 24, loss = 0.03083848
Iteration 25, loss = 0.02990060
Iteration 26, loss = 0.02903107
Iteration 27, loss = 0.02822426
Iteration 28, loss = 0.02747322
Iteration 29, loss = 0.02677557
Iteration 30, loss = 0.02612144
Iteration 31, loss = 0.02550532
Iteration 32, los

In [142]:
clf = train(training_set_X_normalized, training_set_y_normalized, test_set_X_normalized, test_set_y,
      solver = 'lbfgs', alpha=1e-3, hidden_layer_sizes=(training_set_X.shape[1],training_set_X.shape[1]), max_iter=10000)


MSE 0.0041836717257752514
r2_score 0.6964879821538842
explaing variance score 0.7253704843863518


In [112]:
clf.predict(test_set_X_normalized)

array([[ 1.26265057e+00, -2.06563518e-03,  2.03558672e-01,
         1.78039821e-01,  2.06260311e-01,  3.13382298e-02,
         6.82359666e-02,  1.16952816e-03,  1.52906985e-01,
        -2.13644087e-02,  9.59854144e-02, -1.52427030e-02,
         3.97600409e-01,  2.66034232e-02, -1.11657631e-02,
         1.33368826e-01,  6.23002936e-03,  2.87755581e-02,
         1.55516268e-02,  1.51448779e+00,  3.95123857e-04,
         6.58539852e-03,  2.23320208e-02,  1.26545062e-02,
         1.06020938e-01,  2.27329740e-01,  3.79129344e-02,
        -2.04217314e-02,  1.41957068e-02,  2.88808942e-02,
         1.30733054e-02,  1.42923961e-02,  1.35353067e-02,
         3.40534972e-02,  2.30654545e-03,  1.01259955e-02,
         2.20182208e-02, -2.00613939e-02,  4.19387880e-02,
         1.76096639e-02,  3.83494196e-02,  1.00097355e-02,
         8.26049727e-02, -1.16051102e-02,  3.91325195e-01,
         1.22073118e+00,  9.69990077e-03,  6.15279706e-02,
         4.90461846e-02,  1.07858457e-01,  2.10835968e-0

In [17]:
training_set_X_normalized.shape

(20, 41)

In [113]:
test_set_y

array([[1.2685e+00, 4.8000e-03, 1.8380e-01, 1.6840e-01, 2.4230e-01,
        3.4300e-02, 4.5000e-02, 4.8000e-03, 1.4740e-01, 4.4000e-03,
        9.2800e-02, 2.1000e-03, 3.9460e-01, 7.1000e-03, 5.2000e-03,
        1.3110e-01, 3.9000e-03, 3.4500e-02, 4.5000e-03, 1.5162e+00,
        1.9000e-03, 1.8000e-03, 1.4000e-03, 1.5000e-03, 1.0920e-01,
        2.6200e-01, 1.5200e-02, 2.6000e-03, 5.8000e-03, 2.9700e-02,
        5.9000e-03, 1.2000e-03, 2.1000e-03, 1.3700e-02, 1.2000e-03,
        1.7900e-02, 1.0000e-03, 1.5000e-03, 4.1700e-02, 3.2000e-03,
        4.4700e-02, 2.0000e-03, 8.8600e-02, 2.5000e-03, 4.0100e-01,
        1.1924e+00, 4.3000e-03, 6.6800e-02, 3.5900e-02, 8.9900e-02,
        1.3000e-03, 1.5000e-03, 1.1400e-02, 1.2200e-02, 2.0300e-01,
        2.6800e-02, 1.8440e+00, 4.3000e-03, 9.6000e-03, 2.2980e-01,
        1.6227e+00, 1.8110e-01, 3.6000e-03, 9.6000e-03, 1.9000e-03,
        2.0000e-03, 2.9850e-01, 5.6770e-01, 1.6960e-01, 2.3000e-03,
        2.1000e-03, 1.0000e-03, 1.2000e-03, 2.03