In [1]:
import os, sys
import pandas as pd
import numpy as np

sys.path.append('../')
from knobs import Knob
from utils import rocksdb_knobs_make_dict
from steps import train_fitness_function
from torch.utils.data import DataLoader
from network import RocksDBDataset, SingleNet
from train import train, valid

In [2]:
KNOB_PATH = '../data/rocksdb/configs'
EXTERNAL_PATH = '../data/rocksdb/external'
INTERNAL_PATH = '../data/rocksdb/internal'
WK_NUM = 1

In [3]:
class OPT():
    def __init__(self):
        self.target = 0
        self.batch_size = 32
        self.epochs = 20
        self.lr = 0.0001
        
opt = OPT()

In [5]:
raw_knobs = rocksdb_knobs_make_dict(KNOB_PATH)
raw_knobs = pd.DataFrame(data=raw_knobs['data'].astype(np.float32), columns=raw_knobs['columnlabels'])  

In [6]:
raw_knobs

Unnamed: 0,max_background_compactions,max_background_flushes,write_buffer_size,max_write_buffer_number,min_write_buffer_number_to_merge,compaction_pri,compaction_style,level0_file_num_compaction_trigger,level0_slowdown_writes_trigger,level0_stop_writes_trigger,...,open_files,block_size,cache_index_and_filter_blocks,max_bytes_for_level_base,max_bytes_for_level_multiplier,target_file_size_base,target_file_size_multiplier,num_levels,memtable_bloom_size_ratio,compression_ratio
0,6.0,16.0,1304576.0,3.0,2.0,1.0,0.0,3.0,24.0,44.0,...,10000.0,9216.0,0.0,5242880.0,11.0,1800192.0,1.0,8.0,0.10,0.67
1,2.0,14.0,911360.0,6.0,2.0,0.0,0.0,7.0,18.0,55.0,...,100000.0,15360.0,1.0,3145728.0,10.0,1729536.0,1.0,5.0,0.15,0.50
2,8.0,6.0,1474560.0,6.0,1.0,2.0,0.0,5.0,18.0,48.0,...,1000000.0,6144.0,0.0,4194304.0,9.0,1429504.0,1.0,6.0,0.05,0.36
3,12.0,8.0,550912.0,7.0,1.0,3.0,0.0,4.0,18.0,35.0,...,10000.0,3072.0,1.0,2097152.0,11.0,900096.0,1.0,7.0,0.05,0.63
4,11.0,15.0,1426432.0,5.0,1.0,0.0,0.0,5.0,23.0,59.0,...,10000.0,5120.0,0.0,5242880.0,8.0,560128.0,1.0,5.0,0.20,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,1.0,15.0,551936.0,5.0,1.0,3.0,0.0,4.0,18.0,56.0,...,10000.0,8192.0,1.0,3145728.0,8.0,1148928.0,1.0,5.0,0.20,0.93
19996,7.0,8.0,826368.0,4.0,2.0,2.0,0.0,7.0,18.0,56.0,...,1000000.0,13312.0,1.0,7340032.0,10.0,922624.0,2.0,6.0,0.05,0.68
19997,6.0,12.0,730112.0,5.0,2.0,1.0,0.0,7.0,29.0,36.0,...,100000.0,8192.0,1.0,4194304.0,9.0,1661952.0,1.0,6.0,0.00,1.00
19998,9.0,11.0,1430528.0,6.0,1.0,3.0,0.0,4.0,25.0,34.0,...,100000.0,13312.0,1.0,3145728.0,9.0,834560.0,2.0,7.0,0.00,0.77


In [5]:
internal_dict = {}
external_dict = {}

pruned_im = pd.read_csv(os.path.join(INTERNAL_PATH, 'internal_ensemble_pruned_tmp.csv'), index_col=0)
for wk in range(WK_NUM):
    im = pd.read_csv(os.path.join(INTERNAL_PATH, f'internal_results_{wk}.csv'), index_col=0)
    internal_dict[wk] = im[pruned_im.columns]
if opt.target > 15:
    im = pd.read_csv(f'data/target_workload/{opt.target}/internal_results_11.csv', index_col=0)
    internal_dict[wk+1] = im[pruned_im.columns]

for wk in range(WK_NUM):
    ex = pd.read_csv(os.path.join(EXTERNAL_PATH, f'external_results_{wk}.csv'), index_col=0)
    external_dict[wk] = ex
if opt.target > 15:
    ex = pd.read_csv(f'data/target_workload/{opt.target}/external_results_11.csv', index_col=0)
    external_dict[wk+1] = ex

In [6]:
knobs = Knob(raw_knobs, internal_dict, external_dict, opt.target)

In [7]:
knobs.split_data()
knobs.scale_data()

In [8]:
dataset_tr = RocksDBDataset(knobs.norm_X_tr, knobs.norm_em_tr)
dataset_te = RocksDBDataset(knobs.norm_X_te, knobs.norm_em_te)

loader_tr = DataLoader(dataset = dataset_tr, batch_size = opt.batch_size, shuffle=True)
loader_te = DataLoader(dataset = dataset_te, batch_size = opt.batch_size, shuffle=False)

model = SingleNet(input_dim=knobs.norm_X_tr.shape[1], hidden_dim=16, output_dim=knobs.norm_em_tr.shape[-1]).cuda()

In [10]:
best_loss = 100
# name = get_filename('model_save', 'model', '.pt')
for epoch in range(opt.epochs):
    loss_tr = train(model, loader_tr, opt.lr)
    loss_te, outputs = valid(model, loader_te)

    print(f"[{epoch:02d}/{opt.epochs}] loss_tr: {loss_tr:.8f}\tloss_te:{loss_te:.8f}")

[00/20] loss_tr: 0.04003805	loss_te:0.03580615
[01/20] loss_tr: 0.03285220	loss_te:0.02912505
[02/20] loss_tr: 0.02611693	loss_te:0.02275005
[03/20] loss_tr: 0.02022883	loss_te:0.01747382
[04/20] loss_tr: 0.01521571	loss_te:0.01291721
[05/20] loss_tr: 0.01110809	loss_te:0.00943629
[06/20] loss_tr: 0.00824655	loss_te:0.00725319
[07/20] loss_tr: 0.00680578	loss_te:0.00643578
[08/20] loss_tr: 0.00633372	loss_te:0.00619593
[09/20] loss_tr: 0.00614985	loss_te:0.00605251
[10/20] loss_tr: 0.00604046	loss_te:0.00596460
[11/20] loss_tr: 0.00596313	loss_te:0.00590390
[12/20] loss_tr: 0.00591374	loss_te:0.00584863
[13/20] loss_tr: 0.00587182	loss_te:0.00580835
[14/20] loss_tr: 0.00584023	loss_te:0.00577448
[15/20] loss_tr: 0.00581302	loss_te:0.00576012
[16/20] loss_tr: 0.00578510	loss_te:0.00570342
[17/20] loss_tr: 0.00574415	loss_te:0.00564570
[18/20] loss_tr: 0.00567817	loss_te:0.00556786
[19/20] loss_tr: 0.00559141	loss_te:0.00549298


In [49]:
def single_score_function(df, pr):
    df = np.repeat(df, pr.shape[0], axis=0)
    score = (df[0] - pr[0]) + (pr[1] - df[1]) + (df[2] - pr[2]) + (df[3] - pr[3])
    print(score)
    return np.round(score, 6)

In [53]:
df = np.repeat(knobs.default_trg_em, outputs.shape[0], axis=0)
pr = outputs.detach().cpu().numpy()

In [61]:
(df[:,0] - pr[:,0] + pr[:,1] - df[:,1])

array([-0.33032445, -0.4273648 , -0.35677413, ..., -0.21283447,
       -0.36919947, -0.12358297])

In [63]:
0.00362396 - 0.02582621 + 0.42511803 - 0.73324022

-0.33032444

In [62]:
df, pr

(array([[0.00362396, 0.73324022, 0.23343849, 0.55314511],
        [0.00362396, 0.73324022, 0.23343849, 0.55314511],
        [0.00362396, 0.73324022, 0.23343849, 0.55314511],
        ...,
        [0.00362396, 0.73324022, 0.23343849, 0.55314511],
        [0.00362396, 0.73324022, 0.23343849, 0.55314511],
        [0.00362396, 0.73324022, 0.23343849, 0.55314511]]),
 array([[ 0.02582621,  0.42511803,  0.21571767,  0.32778028],
        [ 0.03594913,  0.3382006 ,  0.11593885,  0.5312192 ],
        [ 0.02982348,  0.40266562,  0.34310928,  0.8430038 ],
        ...,
        [ 0.02072407,  0.53750587,  0.19755192,  0.86267316],
        [ 0.0325234 ,  0.3929402 ,  0.20183249,  0.4707964 ],
        [-0.00550541,  0.6005279 ,  0.20492807,  1.0165479 ]],
       dtype=float32))

In [50]:
single_score_function(knobs.default_trg_em, outputs.detach().cpu().numpy())

[-0.02957605  0.43930064 -0.25100482 -0.53670648]


array([-0.029576,  0.439301, -0.251005, -0.536706])

In [42]:
knobs.default_trg_em

array([[0.00362396, 0.73324022, 0.23343849, 0.55314511]])

In [44]:
np.repeat(knobs.default_trg_em, outputs.shape[0], axis=0)

array([[0.00362396, 0.73324022, 0.23343849, 0.55314511],
       [0.00362396, 0.73324022, 0.23343849, 0.55314511],
       [0.00362396, 0.73324022, 0.23343849, 0.55314511],
       ...,
       [0.00362396, 0.73324022, 0.23343849, 0.55314511],
       [0.00362396, 0.73324022, 0.23343849, 0.55314511],
       [0.00362396, 0.73324022, 0.23343849, 0.55314511]])

In [35]:
outputs.detach().cpu().numpy()

array([[ 0.02582621,  0.42511803,  0.21571767,  0.32778028],
       [ 0.03594913,  0.3382006 ,  0.11593885,  0.5312192 ],
       [ 0.02982348,  0.40266562,  0.34310928,  0.8430038 ],
       ...,
       [ 0.02072407,  0.53750587,  0.19755192,  0.86267316],
       [ 0.0325234 ,  0.3929402 ,  0.20183249,  0.4707964 ],
       [-0.00550541,  0.6005279 ,  0.20492807,  1.0165479 ]],
      dtype=float32)

In [23]:
single_score_function(knobs.default_trg_em, outputs.cpu().numpy())

-0.087239

In [15]:
knobs.default_trg_em

array([[0.00362396, 0.73324022, 0.23343849, 0.55314511]])

In [22]:
outputs[0].cpu().numpy()

array([0.02582621, 0.42511803, 0.21571767, 0.32778028], dtype=float32)

In [31]:
np.round(outputs.cpu().detach().numpy()[0], 6)

array([0.025826, 0.425118, 0.215718, 0.32778 ], dtype=float32)

tensor([[ 0.0258,  0.4251,  0.2157,  0.3278],
        [ 0.0359,  0.3382,  0.1159,  0.5312],
        [ 0.0298,  0.4027,  0.3431,  0.8430],
        ...,
        [ 0.0207,  0.5375,  0.1976,  0.8627],
        [ 0.0325,  0.3929,  0.2018,  0.4708],
        [-0.0055,  0.6005,  0.2049,  1.0165]], device='cuda:0')

In [12]:
outputs.shape, outputs

(torch.Size([4000, 4]),
 tensor([[ 0.0258,  0.4251,  0.2157,  0.3278],
         [ 0.0359,  0.3382,  0.1159,  0.5312],
         [ 0.0298,  0.4027,  0.3431,  0.8430],
         ...,
         [ 0.0207,  0.5375,  0.1976,  0.8627],
         [ 0.0325,  0.3929,  0.2018,  0.4708],
         [-0.0055,  0.6005,  0.2049,  1.0165]], device='cuda:0'))