In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd 
from vega_datasets import data

df = data.movies()

# name attributes
name_attrs = [
    col
    for col in df.columns    
    if (df[col].dtype == "object" and df[col].nunique() < 10) or df[col].dtype != "object"
]
df = df[name_attrs]
df.info()

In [None]:
from src.oracle import ColumbusProbOracle, OracleWeight
from src.ProbColumbus import ProbColumbus, ColumbusConfig, SamplingWeight, chart_type, agg_type

oracle_weight = OracleWeight()
oracle = ColumbusProbOracle(oracle_weight)
prob = ProbColumbus(df, ColumbusConfig())

def func(weight):
    n_samples = 10
    n_dashboards = 20
    
    dashboards = [
        prob.sample_n(n_samples, weight)
        for _ in range(n_dashboards)
    ]
    scores = [prob.infer(dashboard, oracle, ["attr_Running_Time_min"]) for dashboard in dashboards]
    sc = np.mean(scores)
    return -sc


In [None]:
from skopt import Optimizer
from skopt.space import Space, Real
from IPython.display import clear_output

attrs = [None] + name_attrs

x = [Real(0.01, 1.0, name=f"x{i}") for i in range(len(attrs[1:]))]
y = [Real(0.01, 1.0, name=f"y{i}") for i in range(len(attrs))]
z = [Real(0.01, 1.0, name=f"z{i}") for i in range(len(attrs))]
ct = [Real(0.01, 1.0, name=f"ct{i}") for i in range(len(chart_type))]
at = [Real(0.01, 1.0, name=f"at{i}") for i in range(len(agg_type))]


opt_weight = Space([
    *x, *y, *z, *ct, *at
])


opt = Optimizer(opt_weight, base_estimator="GP" ,n_initial_points=10, acq_func="EI")

weights = []

for i in range(30):
    params = opt.ask()
    x = np.array(params)
    weight = SamplingWeight(
        x=x[0 : len(attrs) - 1],
        y=x[len(attrs) - 1 : 2 * len(attrs) - 1],
        z=x[2 * len(attrs) - 1 : 3 * len(attrs) - 1],
        ct=x[3 * len(attrs) - 1 : 3 * len(attrs) - 1 + len(chart_type)],
        at=x[3 * len(attrs) - 1 + len(chart_type) :],
        attr=attrs,
        chart_type=chart_type,
        agg_type=agg_type
    )
    
    weights.append(weight)
    
    clear_output(wait=True)
    print(i)
    display(weight.visualize())
    y = func(weight)
    opt.tell(params, y)
    print(opt.get_result().fun)


print(opt.get_result())




In [None]:
import altair as alt


data = {
    
}
chart = alt.Chart()