In [1]:
import sys
import os
import csv
import numpy as np
import torch

In [2]:
sys.path.append(os.path.join(sys.path[0], '../../'))

In [3]:
ls ../../


Dockerfile                botorch_baseline.ipynb    [34mnotebook[m[m/
LICENSE                   cost_log.db               pytest.ini
README.md                 [34mdata[m[m/                     requirements.txt
Untitled.ipynb            docker-compose.yaml       requirements_botorch.txt
Untitled1.ipynb           [34mendure[m[m/                   solver.py
Untitled2.ipynb           [31mendure.py[m[m*                [34mtest[m[m/
Untitled3.ipynb           endure.toml               [34mvenv_endure_torch[m[m/
best_designs.csv          [34mexperiments[m[m/
best_designs.txt          [34mjobs[m[m/


In [4]:
from endure.lcm.data.generator import LCMDataGenerator
from endure.data.io import Reader
from jobs.bayesian_pipeline import BayesianPipeline
from endure.lsm.solver.classic_solver import ClassicSolver
from endure.lsm.cost import EndureCost
from endure.lsm.types import LSMDesign, System, Policy, Workload


In [7]:
config = Reader.read_config("../../endure.toml")
bayesian_optimizer = BayesianPipeline(config)
generator = LCMDataGenerator()
solver = ClassicSolver(config)
cf = EndureCost(config)

In [11]:
def to_cuda(obj):
    if torch.cuda.is_available():
        device = torch.device("cuda")
        for attr_name in dir(obj):
            attr_value = getattr(obj, attr_name)
            if isinstance(attr_value, torch.Tensor):
                setattr(obj, attr_name, attr_value.to(device))
            elif hasattr(attr_value, "__dict__"):
                to_cuda(attr_value)
    else:
        print("CUDA not available")


In [12]:
to_cuda(bayesian_optimizer)
to_cuda(generator)
to_cuda(solver)
to_cuda(cf)

CUDA not available
CUDA not available
CUDA not available
CUDA not available


In [8]:
def compare_designs(n_runs=10, csv_filename='design_comparison.csv'):
        with open(csv_filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Entries per page(E)','Physical Entries per page(B)', 'Selectivity(s)', 'Max bits per element(H)', 'Total elements (N)', 'Empty Reads', 'Non-Empty Reads', 'Range Queries', 'Writes', 'BO Design', 'Analytical Design','BO Cost', 'Analytical Cost', 'Diff(Analytical-Bayesian)'])
            #TODO if this works then put this in toml file
            for i in range(n_runs):
                print(f"Iteration {i+1}/{n_runs} running")
                system = generator._sample_system()
                z0, z1, q, w = generator._sample_workload(4)
                print(np.floor(system.H))
                bo_design, bo_cost = bayesian_optimizer.run(system, z0, z1, q, w)
                analytical_design, analytical_cost = bayesian_optimizer._find_analytical_results(system, z0, z1, q, w)
                writer.writerow([system.E, system.B, system.s, system.H, system.N, z0, z1, q, w, bo_design, analytical_design, bo_cost, analytical_cost, analytical_cost-bo_cost])

In [9]:
compare_designs()

Iteration 1/10 running
14.0
path data/databases/db_cost.db




Best Design Found:
Design: h=2.7680567486745455, T=16.0, Policy=Policy.Leveling, Cost=0.9525173902511597
Cost for the nominal design using analytical solver:  0.9517380523852867
Nominal Design suggested by analytical solver:  LSMDesign(h=2.573988859205945, T=17.305892776319947, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  0.9517380523852867
Nominal Design suggested by analytical solver:  LSMDesign(h=2.573988859205945, T=17.305892776319947, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Iteration 2/10 running
6.0
path data/databases/db_cost.db




Best Design Found:
Design: h=1.0, T=4.0, Policy=Policy.Tiering, Cost=2.292311668395996
Cost for the nominal design using analytical solver:  2.173795230247867
Nominal Design suggested by analytical solver:  LSMDesign(h=3.030713922558791, T=4.495284682641376, policy=<Policy.Tiering: 0>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  2.173795230247867
Nominal Design suggested by analytical solver:  LSMDesign(h=3.030713922558791, T=4.495284682641376, policy=<Policy.Tiering: 0>, Q=1.0, Y=1.0, Z=1.0, K=[])
Iteration 3/10 running
15.0
path data/databases/db_cost.db
Best Design Found:
Design: h=6.105296379465274, T=4.0, Policy=Policy.Leveling, Cost=0.7245389223098755
Cost for the nominal design using analytical solver:  0.7228337534545779
Nominal Design suggested by analytical solver:  LSMDesign(h=5.455940275159894, T=3.8154729920221944, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  0.72283375345



Best Design Found:
Design: h=1.0, T=4.0, Policy=Policy.Leveling, Cost=7.83032751083374
Cost for the nominal design using analytical solver:  7.781628986218873
Nominal Design suggested by analytical solver:  LSMDesign(h=0.0, T=4.286157880053067, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  7.781628986218873
Nominal Design suggested by analytical solver:  LSMDesign(h=0.0, T=4.286157880053067, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Iteration 6/10 running
5.0
path data/databases/db_cost.db




Best Design Found:
Design: h=1.0, T=31.0, Policy=Policy.Leveling, Cost=2.1156692504882812
Cost for the nominal design using analytical solver:  2.1143998811402547
Nominal Design suggested by analytical solver:  LSMDesign(h=0.7648509330697824, T=31.0, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  2.1143998811402547
Nominal Design suggested by analytical solver:  LSMDesign(h=0.7648509330697824, T=31.0, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Iteration 7/10 running
13.0
path data/databases/db_cost.db
Best Design Found:
Design: h=1.2216583039242643, T=6.0, Policy=Policy.Leveling, Cost=4.414978504180908
Cost for the nominal design using analytical solver:  4.370705354224338
Nominal Design suggested by analytical solver:  LSMDesign(h=0.0, T=7.337416398947047, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  4.370705354224338
Nominal Design suggested by 



Best Design Found:
Design: h=6.133330249620504, T=32.0, Policy=Policy.Leveling, Cost=0.6195059418678284
Cost for the nominal design using analytical solver:  0.6193451285211113
Nominal Design suggested by analytical solver:  LSMDesign(h=6.596808474673303, T=31.0, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  0.6193451285211113
Nominal Design suggested by analytical solver:  LSMDesign(h=6.596808474673303, T=31.0, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Iteration 9/10 running
17.0
path data/databases/db_cost.db




Best Design Found:
Design: h=5.207612827801308, T=11.0, Policy=Policy.Leveling, Cost=1.3753105401992798
Cost for the nominal design using analytical solver:  1.3734264153106128
Nominal Design suggested by analytical solver:  LSMDesign(h=5.607815566876443, T=12.070669383061885, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  1.3734264153106128
Nominal Design suggested by analytical solver:  LSMDesign(h=5.607815566876443, T=12.070669383061885, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Iteration 10/10 running
18.0
path data/databases/db_cost.db




Best Design Found:
Design: h=5.1100667622745455, T=10.0, Policy=Policy.Leveling, Cost=1.6052972078323364
Cost for the nominal design using analytical solver:  1.6030481390144793
Nominal Design suggested by analytical solver:  LSMDesign(h=5.412324596151966, T=9.21214187054741, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])
Cost for the nominal design using analytical solver:  1.6030481390144793
Nominal Design suggested by analytical solver:  LSMDesign(h=5.412324596151966, T=9.21214187054741, policy=<Policy.Leveling: 1>, Q=1.0, Y=1.0, Z=1.0, K=[])


In [None]:
cf.calc_cost(LSMDesign(h=15.83, T=31.0, policy=Policy.Leveling, Q=1.0, Y=1.0, Z=1.0, K=[]), System(E=1024, s=2.9102648491798314e-8, B=32.0, N=526728795, H=15.847837338766473, phi=1.0), 0.09, 0.205, 0.605, 0.09999999999999998)

In [None]:
df[df['Diff(Analytical-Bayesian)'] > -0.2].shape[0] / df.shape[0]

In [None]:
df.columns

In [None]:
df.sort_values(by='norm_delta')

In [None]:
df.iloc[2]['BO Design']

In [None]:
df.iloc[2]['Analytical Design']

In [None]:
df.iloc[36]['Max bits per element(H)']

In [None]:
K=2
bounds=torch.tensor([[0.0] * 3, [1.0] * 2 + [K-1]]),


In [None]:
bounds

In [None]:
k = 2
lower_bounds = torch.tensor([0.0, 2.0, 0])
upper_bounds = torch.tensor([10, 32, 1])
bounds = torch.stack([lower_bounds, upper_bounds])
bounds

In [None]:
h_bounds = torch.tensor([0, 10])
t_bounds = torch.tensor([2, 32])
policy_bounds = torch.tensor([0.0, 1.0])
bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1)
bounds

In [None]:
x=torch.tensor([[0.7158, 2.0000, 0.0000]])
continuous_data = x[:, :2]
categorical_data = x[:, 2:]
scaled_continuous_data = (continuous_data - bounds[:, :2][0]) / (bounds[:, :2][1] - bounds[:, :2][0])
scaled_data = torch.cat([scaled_continuous_data, categorical_data], dim=-1)
scaled_data