In [96]:
import sys
import numpy as np
import pandas as pd
from sklearn.svm import SVC,SVR
import os
import sys
from MFTreeSearchCV.MFTreeSearchCV import *
from mf.mf_func import *
import scipy as sp
import scipy.io as scio
import pickle as pkl
from multipolyfit.multipolyfit import multipolyfit
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import linear_model 
import matlab.engine

In [None]:
# Start MATLAB through MATLAB Engine API
eng = matlab.engine.start_matlab()

In [97]:
# Load Data: All of it
RHO_mat = scio.loadmat('data/mountain_car/RHO.mat')
X0_mat = scio.loadmat('data/mountain_car/X0.mat')
V0_mat = scio.loadmat('data/mountain_car/V0.mat')

RHO = RHO_mat['RHO']
X0 = X0_mat['X0']
V0 = V0_mat['V0']

# Data for violations:
rho_r_mat = scio.loadmat('data/mountain_car/rho_r.mat')
xr_mat = scio.loadmat('data/mountain_car/xr.mat')
vr_mat = scio.loadmat('data/mountain_car/vr.mat')

rho_r = rho_r_mat['rho_r']
xr = xr_mat['xr']
vr = vr_mat['vr']

# Data for successes:
rho_g_mat = scio.loadmat('data/mountain_car/rho_g.mat')
xg_mat = scio.loadmat('data/mountain_car/xg.mat')
vg_mat = scio.loadmat('data/mountain_car/vg.mat')

rho_g = rho_g_mat['rho_g']
xg = xg_mat['xg']
vg = vg_mat['vg']

Nsamples = len(X0.T)
assert(Nsamples == len(V0.T))
assert(Nsamples*Nsamples == len(RHO[:,0]))
print(Nsamples)
DEG = 4 # Degree of the polynomial that is being fit to the data
SET_APPROX = 0
disc_z = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] # discretizing z
step = 0.1
ndisc = len(disc_z)
print(ndisc)
fidel_dim = 2

# Regressors:
classifiers = [
    SVR(),
    linear_model.SGDRegressor(),
    linear_model.BayesianRidge(),
    linear_model.LassoLars(),
    linear_model.ARDRegression(),
    linear_model.PassiveAggressiveRegressor(),
    linear_model.TheilSenRegressor(),
    linear_model.LinearRegression()]

100
10


In [60]:
# Cell to play around with data:
x0_idx = 0.2
v0_idx = 0.1
idx = ndisc*(x0_idx*10 - 1) + v0_idx*10
x0_num = int((x0_idx + step)*Nsamples)
v0_num = int((v0_idx + step)*Nsamples)
x0_indices = np.random.choice(Nsamples, x0_num, replace=False)
v0_indices = np.random.choice(Nsamples, v0_num, replace=False)
rho0_indices = [Nsamples*(x_ii-1)+v_ii  for x_ii in x0_indices for v_ii in v0_indices]
x0_samples = [X0.T[ii,0] for ii in x0_indices] # Indices for x0 from the original data
v0_samples = [V0.T[ii,0] for ii in v0_indices] # Indices for v0 from the original data
assert(len(rho0_indices) == len(x0_indices)*len(v0_indices))
x0 = [[xi, vi] for xi in x0_samples for vi in v0_samples] # Combined x0 and v0
rho = [RHO[ii,0] for ii in rho0_indices]
assert(len(x0) == len(rho))

In [100]:
# Constructing function approximations using polyfit:
fname_approx_polyfit = "Approximations/multipolyfit_approx.pkl"
fname_approx_logreg = "Approximations/logreg_approx.pkl"

rsv_approxs_polyfit = dict() # List containing poly-fit functions that approximate the robust satisfaction value
rsv_approxs_logreg = dict() # List containing poly-fit functions that approximate the robust satisfaction value
    
def construct_approx_polyfit():
    for x0_idx in disc_z:
        for v0_idx in disc_z: 
            idx = ndisc*(x0_idx*10 - 1) + v0_idx*10
            x0_num = int((x0_idx + step)*Nsamples)
            v0_num = int((v0_idx + step)*Nsamples)
            assert(x0_num <= Nsamples)
            if(v0_num > Nsamples):
                print(v0_num)
                assert(v0_num <= Nsamples)
            x0_indices = np.random.choice(Nsamples, x0_num, replace=False)
            v0_indices = np.random.choice(Nsamples, v0_num, replace=False)
            rho0_indices = [Nsamples*(x_ii-1)+v_ii  for x_ii in x0_indices for v_ii in v0_indices]
            x0_samples = [X0.T[ii,0] for ii in x0_indices] # Indices for x0 from the original data
            v0_samples = [V0.T[ii,0] for ii in v0_indices] # Indices for v0 from the original data
            assert(len(rho0_indices) == len(x0_indices)*len(v0_indices))
            x0 = np.array([[xi, vi] for xi in x0_samples for vi in v0_samples]) # Combined x0 and v0
            rho = np.array([RHO[ii,0] for ii in rho0_indices])
            assert(len(x0) == len(rho))
            
            model = multipolyfit(x0, rho, DEG, model_out = True)
            print(model)
            rsv_approxs_polyfit[idx] = model
    # pkl.dump(rsv_approxs, open(fname_approx_polyfit, "wb"))

# Constructing function approximations via tree search:
def construct_approx_logreg():
    for x0_idx in disc_z:
        for v0_idx in disc_z:
            idx = ndisc*(x0_idx*10 - 1) + v0_idx*10
            x0_num = int((x0_idx + step)*Nsamples)
            v0_num = int((v0_idx + step)*Nsamples)
            assert(x0_num <= Nsamples)
            if(v0_num > Nsamples):
                print(v0_num)
                assert(v0_num <= Nsamples)
            x0_indices = np.random.choice(Nsamples, x0_num, replace=False)
            v0_indices = np.random.choice(Nsamples, v0_num, replace=False)
            rho0_indices = [Nsamples*(x_ii-1)+v_ii  for x_ii in x0_indices for v_ii in v0_indices]
            x0_samples = [X0.T[ii,0] for ii in x0_indices] # Indices for x0 from the original data
            v0_samples = [V0.T[ii,0] for ii in v0_indices] # Indices for v0 from the original data
            assert(len(rho0_indices) == len(x0_indices)*len(v0_indices))
            x0 = np.array([[xi, vi] for xi in x0_samples for vi in v0_samples]) # Combined x0 and v0
            rho = np.array([RHO[ii,0] for ii in rho0_indices])
            assert(len(x0) == len(rho))
            
            X_train, X_test, y_train, y_test = train_test_split(x0, rho)
            reg = classifiers[1]
            reg.fit(X_train, y_train)
            print(reg.score(X_test, y_test))
            rsv_approxs_logreg[idx] = reg
    # pkl.dump(rsv_approxs, open(fname_approx_logreg, "wb"))
    
            

In [101]:
# Calling functions to construct the models:
construct_approx_polyfit()
construct_approx_logreg()

  beta = linalg.lstsq(A, y)[0]


<function mk_model.<locals>.model at 0x7f4f46bef378>
<function mk_model.<locals>.model at 0x7f4f46bef7b8>
<function mk_model.<locals>.model at 0x7f4f46befb70>
<function mk_model.<locals>.model at 0x7f4f46bef840>
<function mk_model.<locals>.model at 0x7f4f46bef2f0>
<function mk_model.<locals>.model at 0x7f4f46bef620>
<function mk_model.<locals>.model at 0x7f4f46bef730>
<function mk_model.<locals>.model at 0x7f4f46beff28>
<function mk_model.<locals>.model at 0x7f4f46bef598>
<function mk_model.<locals>.model at 0x7f4f46bef158>
<function mk_model.<locals>.model at 0x7f4f46bef9d8>
<function mk_model.<locals>.model at 0x7f4f46befae8>
<function mk_model.<locals>.model at 0x7f4f46bef488>
<function mk_model.<locals>.model at 0x7f4f46befe18>
<function mk_model.<locals>.model at 0x7f4f46bef510>
<function mk_model.<locals>.model at 0x7f4f46befa60>
<function mk_model.<locals>.model at 0x7f4f46befd08>
<function mk_model.<locals>.model at 0x7f4f46bef950>
<function mk_model.<locals>.model at 0x7f4f46b

In [126]:
# Constructing function approximations of the robust satisfaction value at multiple fidelities:
# Use the polyfit function to create approximations:
# Using discrete fidelity bounds of 1-D fidelity: z = [0.0 0.1, 0.2, 0.3, ...., 0.9]. z \in [0,0.1) is evaluated at fidelity 0.0. 
# z = 1 is fidelity at ground truth value, and we run the simulator for that. not given with the approximations
def find_z_closest(z, fidel_dim):
    assert(len(z)==fidel_dim)
    z1 = 0 # Normalized fidelity for x0
    z2 = 0 # Normalized fidelity for  v0
    for ii in range(1, len(disc_z)):
        if(z[0]>=disc_z[ii-1] and z[0]<disc_z[ii]):
            z1 = disc_z[ii-1]
        if(z[1]>=disc_z[ii-1] and z[1]<disc_z[ii]):
            z2 = disc_z[ii-1]
    return z1, z2

def get_approx_polyfit(approx_id):
    rsv_approxs = rsv_approxs_polyfit.copy()
    return rsv_approxs[approx_id]

def get_approx_logreg(approx_id):
    rsv_approxs = rsv_approxs_logreg.copy()
    return rsv_approxs[approx_id]

def retrieve_approximation(z, fidel_dim, approx_type):
    z1, z2 = find_z_closest(z, fidel_dim)
    if (z1 < 1):
        z1_id = z1*10
    if (z2 < 1):
        z2_id = z2*10
    approx_id = ndisc*(z1-1)+z2
    if (approx_type == "logreg"):
        approximation = get_approx_logreg(approx_id)
    if (approx_type == "polyfit"):
        approximation = get_approx_logreg(approx_id)
    return approximation
        
   

In [None]:
# Testing 

In [130]:
def rsv_function(z,x, approx_type):
    # """ Computes the rsv function. """
    func_computation = 0
    if(approx_type == "logreg"):
        func_computation = rsv_function_logreg(x,z)
    if (approx_type == "polyfit"):
        func_computation = rsv_function_polyfit(x,z)
    return func_computation

def rsv_function_logreg(x, z):
    #""" Alternative form for the branin function. """
    approximation = retrieve_approximation(z, fidel_dim, "logreg")
    print(approximation)
    func_computation = approximation.predict(x)
    return -func_computation

def rsv_function_polyfit(x, z):
    #""" Alternative form for the branin function. """
    approximation = retrieve_approximation(z, fidel_dim, "polyfit")
    return branin_function(x, a, alpha[0], alpha[1], r, s, alpha[2])

def get_mf_rsv_function(fidel_dim, approx_type):
    #""" Returns the rsv function as a multifidelity function. ""
    
    def mf_rsv_obj(x, z, approx_type):
    #""" Wrapper for the MF rsv objective.""" 
        assert len(z) == fidel_dim
        return rsv_function(z,x,approx_type)
    
    # Other data
    opt_fidel = np.ones((fidel_dim)) - 0.1 # For now, exclude the ground truth
    fidel_bounds = [[0, 0.9]] * fidel_dim
    opt_pt = np.array([-0.52, -0.0297])
    domain_bounds = [[-1, 0.6], [-0.42, 0.42]] # for x = [x0_bound, v0_bound]
    return mf_rsv_obj, opt_pt, opt_fidel, fidel_bounds, domain_bounds
    
def get_mf_rsv_as_mfof(fidel_dim, approx_type):
#""" Wrapper for get_mf_branin_function which returns as a mfof. """
    mf_rsv_obj, opt_pt, opt_fidel, fidel_bounds, domain_bounds = get_mf_rsv_function(fidel_dim, approx_type)
    fidel_cost_function = _get_mf_cost_function(fidel_bounds, True) # Figure out this line
    opt_val = mf_rsv_obj(opt_fidel, opt_pt)
    return MFOptFunction(mf_rsv_obj, fidel_cost_function, fidel_bounds, domain_bounds,
                       opt_fidel, vectorised=False, opt_pt=opt_pt, opt_val=opt_val)

In [132]:
# Testing RSV function:
# Testing Brannin Function:
fidel_dim = 2
mf_rsv_obj, opt_pt, opt_fidel, fidel_bounds, domain_bounds = get_mf_rsv_function(fidel_dim, "logreg")
mf_rsv_obj([0.1, 0.1], [0.1, 0.2], "logreg")
mf_rsv_obj([-0.5, -0.3], [0.1, 0.2], "logreg")

KeyError: -8.8

In [None]:
# Quit the MATLAB engine:
eng.quit()