In [1]:
import os, sys
import pandas as pd
import numpy as np
import configparser

sys.path.append('../')
from knobs import Knob
from utils import rocksdb_knobs_make_dict
from steps import train_fitness_function
from torch.utils.data import DataLoader
from network import RocksDBDataset, SingleNet
from train import train, valid

In [2]:
class OPT():
    def __init__(self):
        self.target = 0
        self.batch_size = 32
        self.epochs = 20
        self.lr = 0.0001
        self.dbms = 'mysql'
        
opt = OPT()

In [3]:
KNOB_PATH = f'../data/mysql/configs/'
EXTERNAL_PATH = '../data/mysql/external'
INTERNAL_PATH = '../data/mysql/internal'
WK_NUM = 1

In [4]:
def mysql_knob_dataframe(wk, knobs_path):
    knobs_path = os.path.join(knobs_path, str(wk))
    config_len = len(os.listdir(knobs_path))
    cnf_parser = configparser.ConfigParser()
    pd_mysql = pd.DataFrame()
    for idx in range(config_len):
        cnf_parser.read(os.path.join(knobs_path, f'my_{idx}.cnf'))
        conf_dict = cnf_parser._sections['mysqld']
        tmp = pd.DataFrame(data=[conf_dict.values()], columns=conf_dict.keys())
        pd_mysql = pd.concat([pd_mysql, tmp])
        
    pd_mysql = pd_mysql.reset_index(drop=True)
    pd_mysql = pd_mysql.drop(columns=['log-error', 'bind-address'])
    return pd_mysql

def mysql_metrics_dataframe(wk, internal_path, external_path):
    internal = pd.read_csv(os.path.join(internal_path, f'internal_results_{wk}.csv'), index_col=0)
    ## Drop oolumns contained unique data
    unique_data_column = []
    for col in internal.columns:
        if len(pd.value_counts(internal[col])) == 1:
            unique_data_column.append(col)
    internal = internal.drop(columns=unique_data_column)
    
    external = pd.read_csv(os.path.join(external_path, f'external_results_{wk}.csv'), index_col=0)
    latency_columns = []
    for col in external.columns:
        if col.find("latency") == 0 and col != 'latency_max' and col != 'latency_CLEANUP':
            latency_columns.append(col)
    external_ = external[['tps']].copy()
    external_['latency'] = external[latency_columns].max(axis=1)
    return internal, external_

In [5]:
raw_knobs = mysql_knob_dataframe(0, KNOB_PATH)

In [6]:
internal_dict = {}
external_dict = {}
wk = 0
internal_dict[wk], external_dict[wk] = mysql_metrics_dataframe(wk, INTERNAL_PATH, EXTERNAL_PATH)

In [7]:
knobs = Knob(raw_knobs, internal_dict, external_dict, opt)

In [8]:
knobs.split_data()
knobs.scale_data()

In [12]:
dataset_tr = RocksDBDataset(knobs.norm_X_tr, knobs.norm_em_tr)
dataset_te = RocksDBDataset(knobs.norm_X_te, knobs.norm_em_te)

loader_tr = DataLoader(dataset = dataset_tr, batch_size = opt.batch_size, shuffle=True)
loader_te = DataLoader(dataset = dataset_te, batch_size = opt.batch_size, shuffle=False)

model = SingleNet(input_dim=knobs.norm_X_tr.shape[1], hidden_dim=16, output_dim=knobs.norm_em_tr.shape[-1]).cuda()

In [13]:
best_loss = 100
# name = get_filename('model_save', 'model', '.pt')
for epoch in range(opt.epochs):
    loss_tr = train(model, loader_tr, opt.lr)
    loss_te, outputs = valid(model, loader_te)

    print(f"[{epoch:02d}/{opt.epochs}] loss_tr: {loss_tr:.8f}\tloss_te:{loss_te:.8f}")

[00/20] loss_tr: 0.58174279	loss_te:0.55839671
[01/20] loss_tr: 0.53605901	loss_te:0.51558013
[02/20] loss_tr: 0.49418305	loss_te:0.47646551
[03/20] loss_tr: 0.45589934	loss_te:0.44031132
[04/20] loss_tr: 0.42038560	loss_te:0.40711499
[05/20] loss_tr: 0.38757308	loss_te:0.37636619
[06/20] loss_tr: 0.35735111	loss_te:0.34799694
[07/20] loss_tr: 0.32949920	loss_te:0.32167802
[08/20] loss_tr: 0.30337285	loss_te:0.29710514
[09/20] loss_tr: 0.27903308	loss_te:0.27401806
[10/20] loss_tr: 0.25603036	loss_te:0.25207901
[11/20] loss_tr: 0.23438258	loss_te:0.23097265
[12/20] loss_tr: 0.21391478	loss_te:0.21079397
[13/20] loss_tr: 0.19446295	loss_te:0.19163351
[14/20] loss_tr: 0.17596670	loss_te:0.17359141
[15/20] loss_tr: 0.15825011	loss_te:0.15611907
[16/20] loss_tr: 0.14175677	loss_te:0.13967000
[17/20] loss_tr: 0.12589431	loss_te:0.12414357
[18/20] loss_tr: 0.11108195	loss_te:0.10954416
[19/20] loss_tr: 0.09713212	loss_te:0.09600289


In [None]:
def single_score_function(df, pr):
    df = np.repeat(df, pr.shape[0], axis=0)
    score = (df[0] - pr[0]) + (pr[1] - df[1]) + (df[2] - pr[2]) + (df[3] - pr[3])
    print(score)
    return np.round(score, 6)

In [None]:
df = np.repeat(knobs.default_trg_em, outputs.shape[0], axis=0)
pr = outputs.detach().cpu().numpy()

In [None]:
(df[:,0] - pr[:,0] + pr[:,1] - df[:,1])

In [None]:
0.00362396 - 0.02582621 + 0.42511803 - 0.73324022

In [None]:
df, pr

In [None]:
single_score_function(knobs.default_trg_em, outputs.detach().cpu().numpy())

In [None]:
knobs.default_trg_em

In [None]:
np.repeat(knobs.default_trg_em, outputs.shape[0], axis=0)

In [None]:
outputs.detach().cpu().numpy()

In [None]:
single_score_function(knobs.default_trg_em, outputs.cpu().numpy())

In [None]:
knobs.default_trg_em

In [None]:
outputs[0].cpu().numpy()

In [None]:
np.round(outputs.cpu().detach().numpy()[0], 6)

In [None]:
outputs.shape, outputs