In [11]:
import sys
import os
import toml

import torch
import torcharrow
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchdata import datapipes as DataPipe

import pandas as pd
import numpy as np
import scipy.optimize as SciOpt
from tqdm.notebook import tqdm

sys.path.append(os.path.join(sys.path[0], '../..'))

from data.io import Reader
from data.kcost_dataset import KCostDataSet
from model.kcost import KCostModel
from model.tierlevelcost import TierLevelCost
from jobs.train import TrainJob
import lsm.cost as CostFunc

In [2]:
config = Reader.read_config('../../endure.toml')
config

{'app': {'name': 'ENDURE', 'jobs': ['Train']},
 'log': {'name': 'endure-logger',
  'format': '[%(levelname)s][%(asctime)-15s][%(filename)s] %(message)s',
  'datefmt': '%d-%m-%y:%H:%M:%S',
  'level': 'INFO'},
 'io': {'data_dir': '/data'},
 'lsm': {'max_levels': 16,
  'size_ratio': {'max': 50, 'min': 2},
  'bits_per_elem': {'max': 9.5, 'min': 0}},
 'data_gen': {'format': 'parquet',
  'generator': 'LevelCost',
  'dir': 'train-data/level-parquet',
  'file_prefix': 'train-level',
  'num_workers': -1,
  'num_files': 3,
  'precision': 3,
  'samples': 1024},
 'model': {'arch': 'TierLevelCost',
  'dir': '27-10-tierlevel',
  'out_dims': 4,
  'num_cont_vars': 5,
  'num_cate_vars': 1,
  'hidden_layers': 2,
  'embedding_size': 3},
 'train': {'dir': 'train-data/level',
  'file_format': 'csv',
  'mean_bias': [4.75, 0.5, 0.5, 0.5, 0.5],
  'std_bias': [2.74, 0.3, 0.3, 0.3, 0.3],
  'early_stop_num': 3,
  'epsilon': 0.0001,
  'max_epochs': 128,
  'learning_rate_decay': 0.99,
  'learning_rate': 0.001,
  '

In [16]:
model_path = '/data/kcostmodel-10-25'
config_model = Reader.read_config(os.path.join(model_path, 'config.toml'))
cf = CostFunc.EndureKHybridCost(**config_model['system'])
config_model

{'workloads': [{'id': 0, 'z0': 0.25, 'z1': 0.25, 'q': 0.25, 'w': 0.25},
  {'id': 1, 'z0': 0.97, 'z1': 0.01, 'q': 0.01, 'w': 0.01},
  {'id': 2, 'z0': 0.01, 'z1': 0.97, 'q': 0.01, 'w': 0.01},
  {'id': 3, 'z0': 0.01, 'z1': 0.01, 'q': 0.97, 'w': 0.01},
  {'id': 4, 'z0': 0.01, 'z1': 0.01, 'q': 0.01, 'w': 0.97},
  {'id': 5, 'z0': 0.49, 'z1': 0.49, 'q': 0.01, 'w': 0.01},
  {'id': 6, 'z0': 0.49, 'z1': 0.01, 'q': 0.49, 'w': 0.01},
  {'id': 7, 'z0': 0.49, 'z1': 0.01, 'q': 0.01, 'w': 0.49},
  {'id': 8, 'z0': 0.01, 'z1': 0.49, 'q': 0.49, 'w': 0.01},
  {'id': 9, 'z0': 0.01, 'z1': 0.49, 'q': 0.01, 'w': 0.49},
  {'id': 10, 'z0': 0.01, 'z1': 0.01, 'q': 0.49, 'w': 0.49},
  {'id': 11, 'z0': 0.33, 'z1': 0.33, 'q': 0.33, 'w': 0.01},
  {'id': 12, 'z0': 0.33, 'z1': 0.33, 'q': 0.01, 'w': 0.33},
  {'id': 13, 'z0': 0.33, 'z1': 0.01, 'q': 0.33, 'w': 0.33},
  {'id': 14, 'z0': 0.01, 'z1': 0.33, 'q': 0.33, 'w': 0.33}],
 'project': {'name': 'ENDURE',
  'log_level': 'INFO',
  'experiments': ['CostSurfaceEp']},
 'log

In [25]:
tj = TrainJob(config_model)
model = tj._build_model()
model_data = torch.load(os.path.join(model_path, 'kcost_min.model'), map_location=torch.device('cpu'))
load_status = model.load_state_dict(model_data)
model.eval()
load_status

<All keys matched successfully>

In [42]:
z0, z1, q, w = (0.33, 0.33, 0.33, 0.01)
analytical_cf = lambda h, T, K: cf.calc_cost(h, T, K, z0, z1, q, w)
def cost_func_objective(args):
    h, T, K = args[0], args[1], args[2:-4]
    return analytical_cf(h, T, K)

def cost_func_learned(args):
    h, T, K = args[0], args[1], args[2:]
    row = [0, 0, 0, 0, h, z0, z1, q, w] + [T]
    row = np.concatenate([row, K])
    _, inputs = tj._process_row(row)
    inputs = torch.from_numpy(inputs.reshape(1, -1))
    with torch.no_grad():
        pred = model(inputs)
        pred = pred.sum().item()
    return pred

In [31]:
one_mib_in_bits = 1024 * 1024 * 8
T_UPPER_LIM = config_model['lsm']['size_ratio']['max']
T_LOWER_LIM = config_model['lsm']['size_ratio']['min']
H_UPPER_LIM = config_model['lsm']['bits_per_elem']['max']
max_levels = config_model['lsm']['max_levels']

h_initial = 1.
T_initial = 2.
K_initial = [1.] * max_levels

h_bounds = (0, H_UPPER_LIM)
T_bounds = (T_LOWER_LIM, T_UPPER_LIM)
K_bounds = [(T_LOWER_LIM - 1, T_UPPER_LIM - 1)] * max_levels

bounds = [h_bounds, T_bounds] + K_bounds
min_cost = np.inf
design = {}
minimizer_kwargs = {
    'method' : 'SLSQP',
    'bounds' : bounds,
    'options': {'ftol': 1e-2, 'eps': 1e-6, 'disp': True}}
#     'options': {'ftol': 1e-9, 'disp': True}}
x0 = np.array([h_initial, T_initial] + K_initial)

In [44]:
cost_func_learned(x0), cost_func_objective(x0)

(17.052112579345703, 4.767236707195653)

In [45]:
analytical_sol = SciOpt.minimize(
    fun=cost_func_objective,
    x0=x0,
    **minimizer_kwargs
)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 1.757976345103807
            Iterations: 5
            Function evaluations: 96
            Gradient evaluations: 5


In [46]:
learned_sol = SciOpt.minimize(
    fun=cost_func_learned,
    x0=x0,
    **minimizer_kwargs
)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 17.052112579345703
            Iterations: 1
            Function evaluations: 19
            Gradient evaluations: 1


In [11]:
# learned_sol

In [12]:
# analytical_sol