In [1]:
import os, sys
import pandas as pd
import numpy as np
import configparser

sys.path.append('../')
from knobs import Knob
from utils import rocksdb_knobs_make_dict
from steps import train_fitness_function
from torch.utils.data import DataLoader
from network import RocksDBDataset, SingleNet
from train import train, valid

In [2]:
class OPT():
    def __init__(self):
        self.target = 0
        self.batch_size = 32
        self.epochs = 20
        self.lr = 0.0001
        self.dbms = 'mysql'
        
opt = OPT()

In [3]:
KNOB_PATH = f'../data/mysql/configs/'
EXTERNAL_PATH = '../data/mysql/external'
INTERNAL_PATH = '../data/mysql/internal'
WK_NUM = 1

In [4]:
def mysql_knob_dataframe(wk, knobs_path):
    knobs_path = os.path.join(knobs_path, str(wk))
    config_len = len(os.listdir(knobs_path))
    cnf_parser = configparser.ConfigParser()
    pd_mysql = pd.DataFrame()
    for idx in range(config_len):
        cnf_parser.read(os.path.join(knobs_path, f'my_{idx}.cnf'))
        conf_dict = cnf_parser._sections['mysqld']
        tmp = pd.DataFrame(data=[conf_dict.values()], columns=conf_dict.keys())
        pd_mysql = pd.concat([pd_mysql, tmp])
        
    pd_mysql = pd_mysql.reset_index(drop=True)
    pd_mysql = pd_mysql.drop(columns=['log-error', 'bind-address'])
    return pd_mysql

def mysql_metrics_dataframe(wk, internal_path, external_path):
    internal = pd.read_csv(os.path.join(internal_path, f'internal_results_{wk}.csv'), index_col=0)
    ## Drop oolumns contained unique data
    unique_data_column = []
    for col in internal.columns:
        if len(pd.value_counts(internal[col])) == 1:
            unique_data_column.append(col)
    internal = internal.drop(columns=unique_data_column)
    
    external = pd.read_csv(os.path.join(external_path, f'external_results_{wk}.csv'), index_col=0)
    latency_columns = []
    for col in external.columns:
        if col.find("latency") == 0 and col != 'latency_max' and col != 'latency_CLEANUP':
            latency_columns.append(col)
    external_ = external[['tps']].copy()
    external_['latency'] = external[latency_columns].max(axis=1)
    return internal, external_

In [5]:
raw_knobs = mysql_knob_dataframe(0, KNOB_PATH)

In [6]:
internal_dict = {}
external_dict = {}
wk = 0
internal_dict[wk], external_dict[wk] = mysql_metrics_dataframe(wk, INTERNAL_PATH, EXTERNAL_PATH)

In [7]:
knobs = Knob(raw_knobs, internal_dict, external_dict, opt)

In [8]:
knobs.split_data()
knobs.scale_data()

In [9]:
dataset_tr = RocksDBDataset(knobs.norm_X_tr, knobs.norm_em_tr)
dataset_te = RocksDBDataset(knobs.norm_X_te, knobs.norm_em_te)

loader_tr = DataLoader(dataset = dataset_tr, batch_size = opt.batch_size, shuffle=True)
loader_te = DataLoader(dataset = dataset_te, batch_size = opt.batch_size, shuffle=False)

model = SingleNet(input_dim=knobs.norm_X_tr.shape[1], hidden_dim=16, output_dim=knobs.norm_em_tr.shape[-1]).cuda()

In [10]:
best_loss = 100
# name = get_filename('model_save', 'model', '.pt')
for epoch in range(opt.epochs):
    loss_tr = train(model, loader_tr, opt.lr)
    loss_te, outputs = valid(model, loader_te)

    print(f"[{epoch:02d}/{opt.epochs}] loss_tr: {loss_tr:.8f}\tloss_te:{loss_te:.8f}")

[00/20] loss_tr: 0.23922064	loss_te:0.23254650
[01/20] loss_tr: 0.21701668	loss_te:0.21084650
[02/20] loss_tr: 0.19596447	loss_te:0.19006921
[03/20] loss_tr: 0.17615204	loss_te:0.17056985
[04/20] loss_tr: 0.15737690	loss_te:0.15224008
[05/20] loss_tr: 0.13971284	loss_te:0.13508516
[06/20] loss_tr: 0.12309072	loss_te:0.11893380
[07/20] loss_tr: 0.10780801	loss_te:0.10415898
[08/20] loss_tr: 0.09350984	loss_te:0.09054345
[09/20] loss_tr: 0.08048067	loss_te:0.07833501
[10/20] loss_tr: 0.06896177	loss_te:0.06743033
[11/20] loss_tr: 0.05852480	loss_te:0.05768212
[12/20] loss_tr: 0.04920238	loss_te:0.04926832
[13/20] loss_tr: 0.04145773	loss_te:0.04235884
[14/20] loss_tr: 0.03491454	loss_te:0.03674349
[15/20] loss_tr: 0.02983636	loss_te:0.03258236
[16/20] loss_tr: 0.02610531	loss_te:0.02969181
[17/20] loss_tr: 0.02368931	loss_te:0.02792136
[18/20] loss_tr: 0.02221749	loss_te:0.02678977
[19/20] loss_tr: 0.02138708	loss_te:0.02609359


In [14]:
outputs = model(knobs.norm_X_te)

In [22]:
for i, col in enumerate(knobs.columns):
    print(i, col)

0 tmp_table_size
1 innodb_buffer_pool_size
2 innodb_random_read_ahead
3 innodb_spin_wait_delay
4 innodb_read_io_threads
5 thread_cache_size
6 innodb_adaptive_hash_index
7 innodb_buffer_pool_instances
8 metadata_locks_hash_instances
9 innodb_thread_concurrency
10 innodb_sync_spin_loops
11 innodb_sync_array_size
12 innodb_lru_scan_depth
13 innodb_purge_threads
14 table_open_cache_instances
15 innodb_write_io_threads
16 innodb_read_ahead_threshold
17 innodb_io_capacity
18 table_open_cache
19 innodb_flush_neighbors
