In [None]:
%matplotlib notebook
from matplotlib.backends.backend_pdf import PdfPages

import matplotlib.pyplot as plt
import boto
from boto.s3.connection import S3Connection
import pickle
import numpy as np
# from data_io import send_data_to_s3, get_data_from_s3
import pandas as pd
import os

conn = S3Connection()
conn = boto.connect_s3()
bucket = conn.get_bucket('poloczeks3', validate=True)

In [None]:
linethickness = 2

def get_data_from_s3(bucket, key):
    s3_key = bucket.get_key(key)
    if s3_key is None:
        raise ValueError("key not found")
    data = pickle.loads(s3_key.get_contents_as_string())
    return data

def send_data_to_s3(bucket, key, data):
    s3_key = bucket.get_key(key)
    if s3_key is None:
        s3_key = bucket.new_key(key)
    s3_key.set_contents_from_string(pickle.dumps(data))

In [None]:
def process_s3_data(bucket, prefix, num_repl, num_steps):
    data_best, data_cost, data_baseline = [], [], []
    for i in range(num_repl):
        s3_key = bucket.get_key(prefix+str(i))
        if s3_key is None:
            print "{0}_{1} not exist".format(prefix, i)
        else:
            d = pickle.loads(s3_key.get_contents_as_string())
            if len(d['best']) < num_steps:
                print "{0}_{1} has {2} data points".format(prefix, i, len(d['best']))
            else:
                data_best.append(np.concatenate(([d['init_best_truth']], d['best'])))
                data_cost.append(np.concatenate(([0], d['cost'])))
                data_baseline.append(d['init_best_truth'])
    return np.array(data_best), np.array(data_cost), np.array(data_baseline)


def benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, baseline, fig_name, loc='upper right', ub_ylim = 40, fixcost = 1.0, fixcost_ego = 1.0):
#     mkg_x, mkg_y = np.log(mkg_cost+fixcost), baseline[:mkg_best.shape[0]].reshape((-1,1)) - mkg_best
#     mei_x, mei_y = np.log(mei_cost+fixcost), baseline[:mei_best.shape[0]].reshape((-1,1)) - mei_best
#     ego_x, ego_y = np.log(ego_cost+fixcost_ego), baseline[:ego_best.shape[0]].reshape((-1,1)) - ego_best
#     pes_x, pes_y = np.log(pes_cost+fixcost), baseline[:pes_best.shape[0]].reshape((-1,1)) - pes_best
    mkg_x, mkg_y = np.log(mkg_cost+fixcost),  mkg_best
    mei_x, mei_y = np.log(mei_cost+fixcost), mei_best
    ego_x, ego_y = np.log(ego_cost+fixcost_ego), ego_best
    pes_x, pes_y = np.log(pes_cost+fixcost), pes_best
    fig = plt.figure()
    plt.errorbar(mkg_x.mean(axis=0), mkg_y.mean(axis=0), xerr=mkg_x.std(axis=0) / np.sqrt(mkg_x.shape[0]) * 2.0, yerr=mkg_y.std(axis=0) / np.sqrt(mkg_y.shape[0]) * 2.0, label='misoKG', linestyle='-', color = 'b', linewidth=linethickness)
    plt.errorbar(mei_x.mean(axis=0), mei_y.mean(axis=0), xerr=mei_x.std(axis=0) / np.sqrt(mei_x.shape[0]) * 2.0, yerr=mei_y.std(axis=0) / np.sqrt(mei_y.shape[0]) * 2.0, label='misoEI', linestyle='--', color = 'g', linewidth=linethickness)
    plt.errorbar(ego_x.mean(axis=0), ego_y.mean(axis=0), xerr=ego_x.std(axis=0) / np.sqrt(ego_x.shape[0]) * 2.0, yerr=ego_y.std(axis=0) / np.sqrt(ego_y.shape[0]) * 2.0, label='EGO', linestyle=':', color = 'c', linewidth=linethickness)
    plt.errorbar(pes_x.mean(axis=0), pes_y.mean(axis=0), xerr=pes_x.std(axis=0) / np.sqrt(pes_x.shape[0]) * 2.0, yerr=pes_y.std(axis=0) / np.sqrt(pes_y.shape[0]) * 2.0, label="MTBO+", linestyle='-.', color = 'r', linewidth=linethickness)
#     plt.ylim(-1, ub_ylim)
    plt.xlabel("log(Total Cost)", fontsize=20)
    plt.ylabel("best so far", fontsize=20)
    ax = plt.gca()
    handles, labels = ax.get_legend_handles_labels()
    handles = [h[0] for h in handles]
    ax.legend(handles, labels, numpoints=1, loc=loc)
#     plt.savefig(fig_name+'.pdf') # enable it if you want to save to pdf
    print "mkg: {0}, mei: {1}, ego: {2}, pes: {3}".format(mkg_best.shape[0], mei_best.shape[0], ego_best.shape[0], pes_best.shape[0])

In [None]:
# rbRemi from latest run
num_repl = 100
mkg_best, mkg_cost, mkg_baseline = process_s3_data(bucket, "miso/result/mkg_rbRemi_repl_", num_repl, 25)
pes_best, pes_cost, pes_baseline = process_s3_data(bucket, "miso/result/pes_2_rbRemi_repl_", num_repl, 25)
ego_best, ego_cost, ego_baseline = process_s3_data(bucket, "miso/result/ego_rbRemi_repl_", num_repl, 25)
mei_best, mei_cost, mei_baseline = process_s3_data(bucket, "miso/result/mei_rbRemi_repl_", num_repl, 25)
benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, mkg_baseline, "rbRemi")

In [None]:
# rbRemi with fixcost
fixcost = 5 * (1000. + 1.0)
fixcost_ego = 5000.
benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, mkg_baseline, "rbRemi", fixcost=fixcost, fixcost_ego=fixcost_ego)

In [None]:
# rbNew from latest run
num_repl = 100
mkg_best, mkg_cost, mkg_baseline = process_s3_data(bucket, "miso/result/mkg_rbNew_repl_", num_repl, 25)
pes_best, pes_cost, pes_baseline = process_s3_data(bucket, "miso/result/pes_2_rbNew_repl_", num_repl, 25)
ego_best, ego_cost, ego_baseline = process_s3_data(bucket, "miso/result/ego_rbNew_repl_", num_repl, 25)
mei_best, mei_cost, mei_baseline = process_s3_data(bucket, "miso/result/mei_rbNew_repl_", num_repl, 25)
benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, mkg_baseline, "rbNew")

In [None]:
# rbNew with fixcost
fixcost = 5 * (50. + 1.)
fixcost_ego = 5 * 50.
benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, mkg_baseline, "rbNew", fixcost=fixcost, fixcost_ego=fixcost_ego)

In [None]:
# atoext 150 steps, where MEI used mysql backup because s3 results are not there yet
num_repl = 100
mkg_best, mkg_cost, mkg_baseline = process_s3_data(bucket, "miso/result/mkg_atoext_150steps_repl_", num_repl, 150)
pes_best, pes_cost, pes_baseline = process_s3_data(bucket, "miso/result/pes_2_atoext_150steps_repl_", num_repl, 150)
ego_best, ego_cost, ego_baseline = process_s3_data(bucket, "miso/result/ego_atoext_150steps_repl_", num_repl, 150)
# mei_best, mei_cost, mei_baseline = process_s3_data(bucket, "miso/result/mei_atoext_150steps_repl_", num_repl, 150)
mei_best = get_data_from_s3(bucket, "miso/mysql_backup/mei_atoext_best")
mei_cost = get_data_from_s3(bucket, "miso/mysql_backup/mei_atoext_cost")
num_mei_repl = mei_best.shape[0]
mei_best = np.hstack((mkg_baseline[:num_mei_repl].reshape((-1,1)), mei_best))
mei_cost = np.hstack((np.zeros(num_mei_repl).reshape((-1,1)), mei_cost))
benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, mkg_baseline, "atoext150steps")

In [None]:
#ATOext with fixcosts
fixcost = 20 * (17.1 + 3.9 + 0.5)
fixcost_ego = 20 * 17.1
benchmark_plot(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, mkg_baseline, "atoext150steps", fixcost=fixcost, fixcost_ego=fixcost_ego)

In [None]:
################################################################################
# This is a temporary solution for experiments on RB from old runs, should delete when new runs are up
def gain(baseline, x, scaling=False):
    if scaling:
        return (baseline - x[-1]) * (x[0] - x) / (x[0] - x[-1])
    else:
        return baseline - x
def benchmark_plot_old_standard(mkg_best, mkg_cost, mei_best, mei_cost, ego_best, ego_cost, pes_best, pes_cost, baseline, fig_name, loc='lower right', ub_ylim = 40, fixcost = 0.0, fixcost_ego = 0.0):
    mkg_x, mkg_y = np.log([(x+fixcost) for x in mkg_cost]), mkg_best
    mei_x, mei_y = np.log([(x+fixcost) for x in mei_cost]), mei_best
    ego_x, ego_y = np.log([(x+fixcost_ego) for x in ego_cost]), ego_best
    pes_x, pes_y = np.log([(x+fixcost) for x in pes_cost]), pes_best
    fig = plt.figure()
    plt.errorbar(mkg_x.mean(axis=0), gain(baseline[0], mkg_y.mean(axis=0), scaling=True), xerr=mkg_x.std(axis=0) / np.sqrt(mkg_x.shape[0]) * 2.0, yerr=mkg_y.std(axis=0) / np.sqrt(mkg_y.shape[0]) * 2.0, label='misoKG', linestyle='-', color = 'b', linewidth=linethickness)
    plt.errorbar(mei_x.mean(axis=0), gain(baseline[0], mei_y.mean(axis=0), scaling=True), xerr=mei_x.std(axis=0) / np.sqrt(mei_x.shape[0]) * 2.0, yerr=mei_y.std(axis=0) / np.sqrt(mei_y.shape[0]) * 2.0, label='misoEI', linestyle='--', color = 'g', linewidth=linethickness)
    plt.errorbar(ego_x.mean(axis=0), gain(baseline[0], ego_y.mean(axis=0), scaling=True), xerr=ego_x.std(axis=0) / np.sqrt(ego_x.shape[0]) * 2.0, yerr=ego_y.std(axis=0) / np.sqrt(ego_y.shape[0]) * 2.0, label='EGO', linestyle=':', color = 'c', linewidth=linethickness)
    plt.errorbar(pes_x.mean(axis=0), gain(baseline[0], pes_y.mean(axis=0), scaling=True), xerr=pes_x.std(axis=0) / np.sqrt(pes_x.shape[0]) * 2.0, yerr=pes_y.std(axis=0) / np.sqrt(pes_y.shape[0]) * 2.0, label="MTBO+", linestyle='-.', color = 'r', linewidth=linethickness)
    plt.ylim(0, ub_ylim)
    plt.xlabel("log(Total Cost)", fontsize=20)
    plt.ylabel("Gain", fontsize=20)
    ax = plt.gca()
    handles, labels = ax.get_legend_handles_labels()
    handles = [h[0] for h in handles]
    ax.legend(handles, labels, numpoints=1, loc=loc)
#     plt.savefig(fig_name+'.pdf') # enable it if you want to save to pdf
    print "mkg: {0}, mei: {1}, ego: {2}, pes: {3}".format(mkg_best.shape[0], mei_best.shape[0], ego_best.shape[0], pes_best.shape[0])

In [None]:
# rbRemi from mysql data

In [None]:
num_repl = 100
ego_best_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_ego_1_best")
ego_cost_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_ego_1_cost")
mei_best_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiEI_1_best")
mei_cost_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiEI_1_cost")
mkg_best_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiKG_noisefree_best")
mkg_cost_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiKG_noisefree_cost")
pes_best_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/pes_rbpes_best")
pes_cost_rbRemi = get_data_from_s3(bucket, "miso/mysql_backup/pes_rbpes_cost")
baseline_rbRemi = np.amax([np.mean(ego_best_rbRemi[:,0]), np.mean(mei_best_rbRemi[:,0]), np.mean(mkg_best_rbRemi[:,0]), np.mean(pes_best_rbRemi[:,0])]) * np.ones(num_repl)

In [None]:
# rbRemi without fixcost
benchmark_plot_old_standard(mkg_best_rbRemi, mkg_cost_rbRemi, mei_best_rbRemi, mei_cost_rbRemi, ego_best_rbRemi, ego_cost_rbRemi, pes_best_rbRemi, pes_cost_rbRemi, baseline_rbRemi, "rbRemi")

In [None]:
# rbRemi with fixcost
fixcost = 5 * (1000. + 1.0) # np.e**2
fixcost_ego = 5000.
benchmark_plot_old_standard(mkg_best_rbRemi, mkg_cost_rbRemi, mei_best_rbRemi, mei_cost_rbRemi, ego_best_rbRemi, ego_cost_rbRemi, pes_best_rbRemi, pes_cost_rbRemi, baseline_rbRemi, "rbRemi", fixcost=fixcost, fixcost_ego=fixcost_ego)

In [None]:
# rbNew from mysql data

In [None]:
num_repl = 100
ego_best_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_ego_2_best")
ego_cost_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_ego_2_cost")
mei_best_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiEI_2_best")
mei_cost_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiEI_2_cost")
mkg_best_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiKG_noisefree_2_best")
mkg_cost_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/rosenbrock_multiKG_noisefree_2_cost")
pes_best_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/pes_rbnewpes_best")
pes_cost_rbNew = get_data_from_s3(bucket, "miso/mysql_backup/pes_rbnewpes_cost")
baseline_rbNew = np.amax([np.mean(ego_best_rbNew[:,0]), np.mean(mei_best_rbNew[:,0]), np.mean(mkg_best_rbNew[:,0]), np.mean(pes_best_rbNew[:,0])]) * np.ones(num_repl)

In [None]:
# rbNew without fixcost
fixcost = 0.0
benchmark_plot_old_standard(mkg_best_rbNew, mkg_cost_rbNew, mei_best_rbNew, mei_cost_rbNew, ego_best_rbNew, ego_cost_rbNew, pes_best_rbNew, pes_cost_rbNew, baseline_rbNew, "rbNew", fixcost=fixcost)

In [None]:
# rbNew with fixcost
fixcost = 5 * (50. + 1.)
fixcost_ego = 5 * 50.
benchmark_plot_old_standard(mkg_best_rbNew, mkg_cost_rbNew, mei_best_rbNew, mei_cost_rbNew, ego_best_rbNew, ego_cost_rbNew, pes_best_rbNew, pes_cost_rbNew, baseline_rbNew, "rbNew", fixcost=fixcost, fixcost_ego=fixcost_ego)

In [None]:
# atoext 50steps from mysql data

In [None]:
# num_repl = 120
# ego_best_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/ego_atoext_best")
# ego_cost_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/ego_atoext_cost")
# mei_best_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/mei_atoext_best")
# mei_cost_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/mei_atoext_cost")
# mkg_best_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/vkg_atoext_best")
# mkg_cost_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/vkg_atoext_cost")
# pes_best_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/pes_atoext_best")
# pes_cost_atoext_old = get_data_from_s3(bucket, "miso/mysql_backup/pes_atoext_cost")
# baseline_atoext_old = np.amax([np.mean(ego_best_atoext_old[:,0]), np.mean(mei_best_atoext_old[:,0]), np.mean(mkg_best_atoext_old[:,0]), np.mean(pes_best_atoext_old[:,0])]) * np.ones(num_repl)

In [None]:
# benchmark_plot(mkg_best_atoext_old, mkg_cost_atoext_old, mei_best_atoext_old, mei_cost_atoext_old, ego_best_atoext_old, ego_cost_atoext_old, pes_best_atoext_old, pes_cost_atoext_old, baseline_atoext_old, "atoext_old")

In [None]:
# for i in range(100):
#     k = "miso/result/ego_rbNew_repl_{0}".format(i)
#     key = bucket.get_key(k)
#     if key is None:
#         print k

In [None]:
# get_data_from_s3(bucket, 'miso/hyper/pes_atoext')

In [None]:
# a=pd.read_csv("/Users/jialeiwang/Desktop/for_s3/mei_hyper_atoext.csv")

In [None]:
# d = get_data_from_s3(bucket, "miso/result/mei_rbRemi_repl_0")
# d

In [None]:
# dd={0:d['hyperparam'][0,:9], 1:d['hyperparam'][0,9:18], 2:d['hyperparam'][0,18:]}

In [None]:
for k in bucket.list():
    if 'miso' in k.key:
        print k.key

In [None]:
# prefix1 = "miso/result/mkg_atoext_150steps_repl_"
# prefix2 = "miso/result/mei_atoext_150steps_repl_"
# for i in range(100):
#     try:
#         d1 = get_data_from_s3(bucket, prefix1+str(i))
#         d2 = get_data_from_s3(bucket, prefix2+str(i))
#         d2['init_best_truth'] = d1['init_best_truth']
#         send_data_to_s3(bucket, prefix2+str(i), d2)
#     except:
#         print i
#     #     t1.append(d1['init_best_truth'])
# #     t2.append(d2['init_best_truth'])
# # print np.mean(t1)
# # print np.mean(t2)
# # for i in range(num_repl):
# #         d = get_data_from_s3(bucket, prefix+str(i))
# #         if len(d['best']) < num_steps:
# #             print "{0}_{1} has {2} data points".format(prefix, i, len(d['best']))
# #         else:
# #             data_best.append(np.concatenate(([d['init_best_truth']], d['best'])))
# #             data_cost.append(np.concatenate(([0], d['cost'])))
# #             data_baseline.append(d['init_best_truth'])

In [None]:
d = get_data_from_s3(bucket, "miso/data/rbRemi_IS_1_5_points_repl_0")

In [None]:
d

In [None]:
with open("/Users/jialeiwang/Documents/miso/rbpes_IS_0_1_5_points_each_repl_0.pickle",'rb') as f:
    dd = pickle.load(f)

In [None]:
np.array(dd['vals'])-456.3