In [None]:
%matplotlib notebook
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import boto
from boto.s3.connection import S3Connection
from data_io import get_data_from_s3, send_data_to_s3
conn = S3Connection()
conn = boto.connect_s3()
bucket = conn.get_bucket('poloczeks3', validate=True)

In [None]:
def summary_best(filename, num_data, num_repl):
    data_best = []
    for repl_idx in range(num_repl):
        d = get_data_from_s3(bucket, "coldstart/result/{0}_repl_{1}".format(filename, repl_idx))
        if len(d['best']) != num_data:
            raise RuntimeError("data loss")
        data_best.append(np.concatenate(([d['init_best_truth']], d['best'])).tolist())
    return np.array(data_best)

In [None]:
def make_plot(ego_name, mkg_name, kg_name, mtbo_name, num_data, num_repl=100):
    fig = plt.figure()
    ego_best = summary_best(ego_name, num_data, num_repl)
    mkg_best = summary_best(mkg_name, num_data, num_repl)
    kg_best = summary_best(kg_name, num_data, num_repl)
    mtbo_best = summary_best(mtbo_name, num_data, num_repl)
    baseline = np.mean(ego_best, axis=0)[0]
    plt.errorbar(range(ego_best.shape[1]), baseline-np.mean(ego_best, axis=0), yerr=np.std(ego_best, axis=0) * 2.0 / np.sqrt(ego_best.shape[0]), label="EGO")
    plt.errorbar(range(mkg_best.shape[1]), baseline-np.mean(mkg_best, axis=0), yerr=np.std(mkg_best, axis=0) * 2.0 / np.sqrt(mkg_best.shape[0]), label="wsKG")
    plt.errorbar(range(kg_best.shape[1]), baseline-np.mean(kg_best, axis=0), yerr=np.std(kg_best, axis=0) * 2.0 / np.sqrt(kg_best.shape[0]), label="KG")
    plt.errorbar(range(mtbo_best.shape[1]), baseline-np.mean(mtbo_best, axis=0), yerr=np.std(mtbo_best, axis=0) * 2.0 / np.sqrt(mtbo_best.shape[0]), label="MTBO+")
    plt.legend(loc='lower right')
    plt.xlabel("Iteration")
    plt.ylabel("Gain")

In [None]:
# rb_van

In [None]:
make_plot('ego_rbCvanN', 'mkg_rbCvanN', 'kg_rbCvanN', 'pes_rbCvanN', 25)

In [None]:
# rb_sin

In [None]:
make_plot('ego_rbCsinN', 'mkg_rbCsinN', 'kg_rbCsinN', 'pes_rbCsinN', 25)

In [None]:
# rb_bias

In [None]:
make_plot('ego_rbCbiasN', 'mkg_rbCbiasN', 'kg_rbCbiasN', 'pes_rbCbiasN', 25)

In [None]:
# ato_van

In [None]:
make_plot('ego_atoC_vanilla', 'mkg_atoC_vanilla', 'kg_atoC_vanilla', 'pes_atoC_vanilla', 50)

In [None]:
# ato_var2

In [None]:
make_plot('ego_atoC_var2', 'mkg_atoC_var2', 'kg_atoC_var2', 'pes_atoC_var2', 50)

In [None]:
# ato_var3

In [None]:
make_plot('ego_atoC_var3', 'mkg_atoC_var3', 'kg_atoC_var3', 'pes_atoC_var3', 50)

In [None]:
# ato_var4

In [None]:
make_plot('ego_atoC_var4', 'mkg_atoC_var4', 'kg_atoC_var4', 'pes_atoC_var4', 50)

In [None]:
# def plot_cor(func_name, dim):
#     with open("plot/{0}_plot_data.pickle".format(func_name), "rb") as f:
#         data = pickle.load(f)
#     new_data = {'x': data['x'], 'cor_is': np.zeros((len(data['x']), dim)), 'cor_delta': np.zeros((len(data['x']), dim))}
#     with PdfPages("plot/{0}_cor_is.pdf".format(func_name)) as pdf:
#         for which_dim in range(dim):
#             plt.figure()
#             plt.plot(data['x'], np.mean(data['cor_is'][:,:,which_dim], axis=0))
#             plt.title("dim {0}".format(which_dim))
#             pdf.savefig()
#             plt.close()
#             new_data['cor_is'][:, which_dim] = np.mean(data['cor_is'][:,:,which_dim], axis=0)
#     with PdfPages("plot/{0}_cor_delta_gp.pdf".format(func_name)) as pdf:
#         for which_dim in range(dim):
#             plt.figure()
#             plt.plot(data['x'], np.mean(data['cor_delta'][:,:,which_dim], axis=0))
#             plt.title("dim {0}".format(which_dim))
#             pdf.savefig()
#             plt.close()
#             new_data['cor_delta'][:, which_dim] = np.mean(data['cor_delta'][:,:,which_dim], axis=0)
#     with open("plot/{0}_for_plot.pickle".format(func_name), "wb") as f:
#         pickle.dump(new_data, f)
    

In [None]:
def plot_cor(func_name, dim):
    with open("plot/{0}_for_plot.pickle".format(func_name), "rb") as f:
        data = pickle.load(f)
    with PdfPages("plot/{0}_cor_is.pdf".format(func_name)) as pdf:
        for which_dim in range(dim):
            plt.figure()
            plt.plot(data['x'], data['cor_is'][:,which_dim])
            plt.title("dim {0}".format(which_dim))
            pdf.savefig()
            plt.close()
    with PdfPages("plot/{0}_cor_delta_gp.pdf".format(func_name)) as pdf:
        for which_dim in range(dim):
            plt.figure()
            plt.plot(data['x'], data['cor_delta'][:,which_dim])
            plt.title("dim {0}".format(which_dim))
            pdf.savefig()
            plt.close()

In [None]:
# plot_cor('rb', 2)
# plot_cor('ato', 8)

In [None]:
# for filename in os.listdir('tmp/coldstart/result'):
#     if "pickle" in filename:
#         with open("tmp/coldstart/result/"+filename, 'rb') as f:
#             d = pickle.load(f)
#             key_name = "coldstart/result/"+filename.split('.')[0]
#             print key_name
#             send_data_to_s3(bucket, key_name, d)