In [None]:
import sklearn.gaussian_process as gp
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import minimize

In [None]:
# code referenced from github.com/thuijskens/bayesian-optimization

# method sample_loss() is what is used to train and evaluate fasttext model
# last cell runs the tuning

In [None]:
def get_tuning_params(bounds, hyper_params):
    discrete = ["minn","maxn","wordNgrams","epoch","minCount","minCountLabel",]
    params = []
    for index, item in enumerate(bounds):
        if hyper_params[index] in discrete:
            if hyper_params[index] == "minn" and "maxn" in hyper_params and hyper_params.index("minn") > hyper_params.index("maxn"):
                params.append(np.random.randint(low = item[0],high = min(bounds[hyper_params.index("maxn")][0],item[1])+1))
            elif hyper_params[index] == "maxn" and "minn" in hyper_params and hyper_params.index("maxn") > hyper_params.index("minn"):
                params.append(np.random.randint(low = max(item[0],bounds[hyper_params.index("minn")][1] ),high =item[1]+1))
            else:
                params.append(np.random.randint(low = item[0],high = item[1]+1))
        else:
            params.append(np.random.uniform(low = item[0],high = item[1]))

    return np.array(params)

In [None]:
def expected_improvement(x, gaussian_process, evaluated_loss, greater_is_better=False, n_params=1):

    x_to_predict = x.reshape(-1, n_params)

    mu, sigma = gaussian_process.predict(x_to_predict, return_std=True)

    if greater_is_better:
        loss_optimum = np.max(evaluated_loss)
    else:
        loss_optimum = np.min(evaluated_loss)

    scaling_factor = (-1) ** (not greater_is_better)

    # In case sigma equals zero
    with np.errstate(divide='ignore'):
        Z = scaling_factor * (mu - loss_optimum) / sigma
        expected_improvement = scaling_factor * (mu - loss_optimum) * norm.cdf(Z) + sigma * norm.pdf(Z)
        expected_improvement[sigma == 0.0] == 0.0

    return -1 * expected_improvement

In [None]:
def sample_next_hyperparameter(acquisition_func, gaussian_process, evaluated_loss,bounds, hyper_params,
                                   greater_is_better=False, n_restarts=25):

    best_x = None
    best_acquisition_value = 1
    n_params = bounds.shape[0]

    for i in range(n_restarts):
        starting_point = get_tuning_params(bounds=bounds,hyper_params=hyper_params)
        res = minimize(fun=acquisition_func,
                       x0=starting_point.reshape(1, -1),
                       bounds=bounds,
                       method='L-BFGS-B',
                       args=(gaussian_process, evaluated_loss, greater_is_better, n_params))
        if res.fun < best_acquisition_value:
            best_acquisition_value = res.fun
            best_x = res.x

    return best_x

In [None]:
def bayesian_optimisation(n_iters, sample_loss, tuning_vars , n_pre_samples=5, alpha=1e-5, epsilon=1e-7, saved_df=None):
   
    #tuning vars list of lists
    #bounds, np array (matrix)
    #hyper_params list of params
    temp_bounds_list = []
    hyper_params = []
    for index, item in enumerate(tuning_vars):
        hyper_params.append(item[0])
        temp_bounds_list.append(np.array([item[1],item[2]]))
    
    bounds =np.array(temp_bounds_list)


    x_list = []
    y_list = []
    

    n_params = bounds.shape[0]


    if saved_df is None:
        for i in range (n_pre_samples):
            params = get_tuning_params(bounds=bounds,hyper_params=hyper_params)
            x_list.append(params)
            y_list.append(sample_loss(params,hyper_params))
    else:
        numpy_matr = saved_df.values

        for i in range(numpy_matr.shape[0]):
            saved_row = numpy_matr[i][:-1].copy()
            saved_y =  numpy_matr[i][-1].copy()
            x_list.append(saved_row)
            y_list.append(float(saved_y))

    
    
    xp = np.array(x_list)
    yp = np.array(y_list)
    matr = np.column_stack((xp,yp))
    col_names = hyper_params.copy()
    col_names.append("y")
    df = pd.DataFrame(matr,columns=col_names)
    df.to_csv("test.csv")

    
    kernel = gp.kernels.Matern()
    model = gp.GaussianProcessRegressor(kernel=kernel,
                                            alpha=alpha,
                                            n_restarts_optimizer=10,
                                            normalize_y=True)

    for n in range(n_iters):

        model.fit(xp, yp)

        # Sample next hyperparameter
        next_sample = sample_next_hyperparameter(acquisition_func=expected_improvement, gaussian_process=model,
                                                 evaluated_loss=yp, greater_is_better=True, bounds=bounds,
                                                 hyper_params=hyper_params, n_restarts=100)

        # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point.
        if np.any(np.abs(next_sample - xp) <= epsilon):
            next_sample = get_tuning_params(bounds=bounds,hyper_params=hyper_params)

        # Sample loss for new set of parameters
        cv_score = sample_loss(next_sample,hyper_params,n+1)

        # Update lists
        x_list.append(next_sample)
        y_list.append(cv_score)

        # Update xp and yp
        xp = np.array(x_list)
        yp = np.array(y_list)
        
        matr = np.column_stack((xp,yp)) #TODO fix bad practice here
        col_names = hyper_params.copy()
        col_names.append("y")
        df = pd.DataFrame(matr,columns=col_names)
        df.to_csv("test.csv")

    return xp, yp


In [None]:
import fastText
import os

def sample_loss(points, hyperparams,n=0):
    
#     print(type(points),type(hyperparams))
#     print(points,hyperparams)
    #default values - some hyperparamters missing
    minCount =1
    minCountLabel = 0
    wordNgrams = 1
    minn = 0
    maxn = 0
    lr = 0.1
    dim = 300
    epoch = 5
    
    train_data = os.path.join(os.getenv("DATADIR",""),"train_noc_1_level.txt")
    test_data = os.path.join(os.getenv("DATADIR",""),"test_noc_1_level.txt")
    
    for i in range(len(hyperparams)):
        key = hyperparams[i]
        value = points[i]
        if key == "minCount":
            minCount = int(round(value))
        elif key == "minCountLabel":
            minCountLabel = int(round(value))
        elif key == "wordNgrams":
            wordNgrams = int(round(value))
        elif key == "minn":
            minn= int(round(value))
        elif key == "maxn":
            maxn = int(round(value))
        elif key =="lr":
            lr = round(value,2)
        elif key == "dim":
             dim = int(round(value))
        elif key == "epoch":
             epoch = int(round(value))
        else:
             raise Exception("Invalid Hyperparameter: ",key)

    
        
    model = fastText.train_supervised(input=train_data,minCount = minCount, minCountLabel= minCountLabel, wordNgrams=wordNgrams, minn = minn, maxn=maxn,lr=lr,dim=dim,epoch=epoch)

    print("Iteration",n,"complete")
    return model.test(test_data,1)[1]


In [None]:

tune_variables = [["minn",1,4],["maxn",1,6], ["wordNgrams",1,5], ["dim",100,100],["epoch",10,100],["lr",0,1],["minCount",0,8] ]

xp,yp = bayesian_optimisation(n_iters=20,sample_loss=sample_loss,tuning_vars=tune_variables)

In [None]:
print(xp)
print(yp)