In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
from tabulate import tabulate



In [17]:
ga_path = '../pickles/ga/30runs/'
kbga_path = '../pickles/kbga/30runs/'

In [18]:
ga_files = ['30_run_az_ga_ts.pkl', 
            '30_run_imdb_ga_ts.pkl',
            '30_run_yelp_ga_ts.pkl',]

In [19]:
kbga_files = ['30_run_az_kbga_vp_ts.pkl',
              '30_run_imdb_kbga_vp_ts.pkl',
              '30_run_yelp_kbga_vp_ts.pkl',]

In [12]:
[i for i in map("{:.5f}".format, [1.123, 2.123, 3.234])]

['1.12300', '2.12300', '3.23400']

In [109]:
def average_best_fitness_value(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        az.iloc[-1, [0, 24, 49, 74, 99]],
        imdb.iloc[-1, [0, 24, 49, 74, 99]],
        yelp.iloc[-1, [0, 24, 49, 74, 99]],
        ]
    
    abfv = pd.DataFrame(data, columns=cols)
    abfv = abfv.append(abfv.mean(), ignore_index=True)

    abfv.index = index_col
    abfv.index.name = "Datasets"
    
    return abfv

In [110]:
def average_best_of_generation(az, imdb, yelp):
    cols = ["1-25", "26-50", "51-75", "76-100"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        [az.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [imdb.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [yelp.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
    ]

    abog = pd.DataFrame(data, columns=cols)
    abog = abog.append(abog.mean(), ignore_index=True)

    abog.index = index_col
    abog.index.name = "Datasets"
    return abog

In [111]:
def optimization_accuracy(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    az_mins = min(az.min().to_list())
    az_maxs = max(az.max().to_list())
    imdb_mins = min(imdb.min().to_list())
    imdb_maxs = max(imdb.max().to_list())
    yelp_mins = min(yelp.min().to_list())
    yelp_maxs = max(yelp.max().to_list())

    data = [
        list(map(lambda i: (i-az_mins)/(az_maxs-az_mins), az.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-imdb_mins)/(imdb_maxs-imdb_mins), imdb.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-yelp_mins)/(yelp_maxs-yelp_mins), yelp.iloc[-1, [0, 24, 49, 74, 99]])),
    ]

    oa = pd.DataFrame(data, columns=cols)
    oa = oa.append(oa.mean(), ignore_index=True)

    oa.index = index_col
    oa.index.name = "Datasets"
    return oa

In [112]:
def evolutionary_leap(base, runs):
    leap = [[0 for i in range(runs)]]
    for i in range(1, 100):
        # print(ga_az.iloc[:, i] - ga_az.iloc[:, i-1])
        leap.append(list(map(lambda x: 1 if x != 0 else 0, base.iloc[:-1, i] - base.iloc[:-1, i-1])))
    leap = np.transpose(leap)

    indexes = ["run_"+str(i) for i in range(1, runs+1)]
    leap_df = pd.DataFrame(leap, columns=base.columns[:-1])
    leap_df.index = indexes

    data = {
        "gen_25": leap_df.iloc[:, :25].sum(axis=1),
        "gen_50": leap_df.iloc[:, 25:50].sum(axis=1),
        "gen_75": leap_df.iloc[:, 50:75].sum(axis=1),
        "gen_100": leap_df.iloc[:, 75:].sum(axis=1)
    }
    
    leap_count = pd.DataFrame(data)
    leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
    
    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    leap_count.index = indexes
    leap_count.index.name = 'runs'
    
    return leap_count


def likelihood_of_evolution_leap(az, imdb, yelp, runs):
    cols = ["gen_"+str(i) for i in [25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    az_leaps = evolutionary_leap(az, runs)
    imdb_leaps = evolutionary_leap(imdb, runs)
    yelp_leaps = evolutionary_leap(yelp, runs)

    data = [
        az_leaps.iloc[-1]/runs,
        imdb_leaps.iloc[-1]/runs,
        yelp_leaps.iloc[-1]/runs,
    ]

    el = pd.DataFrame(data, columns=cols)
    el = el.append(el.mean(), ignore_index=True)

    el.index = index_col
    el.index.name = "Datasets"

    return el

In [113]:
def probability_of_convergence(az, imdb, yelp, success_thresh, runs):
    az_count = 0
    imdb_count = 0
    yelp_count = 0

    for i in range(runs):
        if az.iloc[i, -2] >= success_thresh:
            az_count += 1
        if imdb.iloc[i, -2] >= success_thresh:
            imdb_count += 1
        if yelp.iloc[i, -2] >= success_thresh:
            yelp_count += 1

    cols = ["P"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        az_count/runs,
        imdb_count/runs,
        yelp_count/runs
    ]

    pc = pd.DataFrame(data, columns=cols)
    pc = pc.append(pc.mean(), ignore_index=True)

    pc.index = index_col
    pc.index.name = "Datasets"

    return pc

In [114]:
def function_evaluations(base, success_thresh, runs):
    evolutions = 0

    mask = base.iloc[:, :-1] >= success_thresh
    for i in range(runs):
        try:
            # print(base[mask].iloc[i].dropna())
            evolutions += int(base[mask].iloc[i].dropna().index[0].split("_")[1]) 
        except:
            evolutions += 0

    return evolutions


def average_no_of_function_evaluations(az, imdb, yelp, success_thresh, runs):
    az_eval = function_evaluations(az, success_thresh, runs)
    imdb_eval = function_evaluations(imdb, success_thresh, runs)
    yelp_eval = function_evaluations(yelp, success_thresh, runs)

    cols = ["AFES"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        az_eval/runs,
        imdb_eval/runs,
        yelp_eval/runs
    ]

    afes = pd.DataFrame(data, columns=cols)
    afes = afes.append(afes.mean(), ignore_index=True)

    afes.index = index_col
    afes.index.name = "Datasets"

    return afes

In [115]:
def successful_performance(az, imdb, yelp, success_thresh, runs):
    afes = average_no_of_function_evaluations(az, imdb, yelp, success_thresh, runs)
    p = probability_of_convergence(az, imdb, yelp, success_thresh, runs)

    cols = ["SP"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        afes.iloc[0, 0]/p.iloc[0, 0],
        afes.iloc[1, 0]/p.iloc[1, 0],
        afes.iloc[2, 0]/p.iloc[2, 0]
    ]

    sp = pd.DataFrame(data, columns=cols)
    sp = sp.append(sp.mean(), ignore_index=True)

    sp.index = index_col
    sp.index.name = "Datasets"

    return sp

In [116]:
from scipy import stats

def one_tailed_t_test(abfv, abog, oa, el):
    h0 = False
    alpha = 0.05


    t_value, p_value = stats.ttest_1samp(abfv.iloc[:-1, :5], abfv.iloc[:-1, 5:])
    p_df = pd.DataFrame(p_value).astype()
    print(p_df)
        

In [20]:
one_tailed_t_test(abfv, abog, oa, el)

NameError: name 'abfv' is not defined

In [118]:
def tabulate_runs(save_path, runs):
    with open(save_path, 'rb') as gf:
        data = pickle.load(gf)
        chromos = []
        scores = []
        exec_time = []
        for run in data:
            chromos.append(run[0])
            scores.append(run[1])
            exec_time.append(run[2])

    df = pd.DataFrame()
    cols = ["gen_" + str(i) for i in range(1, 101)]
    df[cols] = pd.DataFrame(scores)
    
    df = pd.concat([df, pd.DataFrame({'exec_time': exec_time})], axis=1)
    # df = pd.concat([df, pd.DataFrame([df.mean().tolist()], columns=cols+["exec_time"])], axis=0, ignore_index=True)
    df = df.append(df.mean(), ignore_index=True)

    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    df.index = indexes
    df.index.name = 'runs'
    
    return df

In [119]:
runs = 30
success_thresh = 0.8

ga_az = tabulate_runs(ga_path+ga_files[0], runs)
ga_imdb = tabulate_runs(ga_path+ga_files[1], runs)
ga_yelp = tabulate_runs(ga_path+ga_files[2], runs)

kbga_az = tabulate_runs(kbga_path+kbga_files[0], runs)
kbga_imdb = tabulate_runs(kbga_path+kbga_files[1], runs)
kbga_yelp = tabulate_runs(kbga_path+kbga_files[2], runs)

  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)


In [None]:
# base_ga = pd.concat([ga_az.iloc[-1, :], ga_imdb.iloc[-1, :], ga_yelp.iloc[-1, :]], axis=1)
# base_ga.columns = ['Amazon', "IMDB", "Yelp"]
# base_ga

## Measures

In [120]:
abfv_ga = average_best_fitness_value(ga_az, ga_imdb, ga_yelp)
abfv_kbga = average_best_fitness_value(kbga_az, kbga_imdb, kbga_yelp)
abfv = pd.concat([abfv_ga, abfv_kbga], axis=1)
abfv

  abfv = abfv.append(abfv.mean(), ignore_index=True)
  abfv = abfv.append(abfv.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.644933,0.732933,0.7632,0.777067,0.789067,0.688667,0.834267,0.870667,0.885467,0.892267
IMDB,0.646257,0.737701,0.775223,0.796702,0.810873,0.700535,0.827718,0.877094,0.903209,0.917736
Yelp,0.488,0.713867,0.7456,0.7664,0.7784,0.646933,0.7888,0.8316,0.851067,0.8608
Average,0.593063,0.728167,0.761341,0.780056,0.79278,0.678712,0.816928,0.859787,0.879914,0.890268


In [121]:
abog_ga = average_best_of_generation(ga_az, ga_imdb, ga_yelp)
abog_kbga = average_best_of_generation(kbga_az, kbga_imdb, kbga_yelp)
abog = pd.concat([abog_ga, abog_kbga], axis=1)
abog

  abog = abog.append(abog.mean(), ignore_index=True)
  abog = abog.append(abog.mean(), ignore_index=True)


Unnamed: 0_level_0,1-25,26-50,51-75,76-100,1-25,26-50,51-75,76-100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,0.700603,0.751435,0.771003,0.783893,0.78184,0.856693,0.879093,0.889339
IMDB,0.702684,0.759155,0.787269,0.804449,0.776873,0.855918,0.891547,0.911127
Yelp,0.663936,0.732795,0.756971,0.772997,0.733035,0.813419,0.842624,0.856709
Average,0.689074,0.747795,0.771747,0.787113,0.763916,0.84201,0.871088,0.885725


In [124]:
oa_ga = optimization_accuracy(ga_az, ga_imdb, ga_yelp)
oa_kbga = optimization_accuracy(kbga_az, kbga_imdb, kbga_yelp)
oa = pd.concat([oa_ga, oa_kbga], axis=1)
oa

  oa = oa.append(oa.mean(), ignore_index=True)
  az_mins = min(az.min().to_list())
  az_maxs = max(az.max().to_list())
  imdb_mins = min(imdb.min().to_list())
  imdb_maxs = max(imdb.max().to_list())
  yelp_mins = min(yelp.min().to_list())
  yelp_maxs = max(yelp.max().to_list())
  oa = oa.append(oa.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,2e-06,7e-06,9e-06,1e-05,1.1e-05,0.101093,0.697814,0.846995,0.90765,0.935519
IMDB,1e-06,7e-06,1e-05,1.1e-05,1.2e-05,0.080808,0.561279,0.747811,0.846465,0.901347
Yelp,0.0,1.1e-05,1.3e-05,1.4e-05,1.4e-05,0.105208,0.659375,0.826562,0.902604,0.940625
Average,1e-06,8e-06,1e-05,1.2e-05,1.2e-05,0.095703,0.63949,0.807123,0.885573,0.92583


In [125]:
el_ga = likelihood_of_evolution_leap(ga_az, ga_imdb, ga_yelp, runs)
el_kbga = likelihood_of_evolution_leap(kbga_az, kbga_imdb, kbga_yelp, runs)
el = pd.concat([el_ga, el_kbga], axis=1)
el

  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  el = el.append(el.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  el = el.append(el.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_25,gen_50,gen_75,gen_100,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,0.314444,0.176667,0.082222,0.083333,0.508889,0.25,0.118889,0.056667
IMDB,0.402222,0.321111,0.231111,0.154444,0.546667,0.414444,0.278889,0.171111
Yelp,0.414444,0.207778,0.145556,0.085556,0.51,0.301111,0.154444,0.078889
Average,0.377037,0.235185,0.152963,0.107778,0.521852,0.321852,0.184074,0.102222


In [126]:
pc_ga = probability_of_convergence(ga_az, ga_imdb, ga_yelp, success_thresh, runs)
pc_kbga = probability_of_convergence(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
pc = pd.concat([pc_ga, pc_kbga], axis=1)
pc

  pc = pc.append(pc.mean(), ignore_index=True)
  pc = pc.append(pc.mean(), ignore_index=True)


Unnamed: 0_level_0,P,P
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,0.366667,1.0
IMDB,0.766667,1.0
Yelp,0.1,1.0
Average,0.411111,1.0


In [127]:
afes_ga = average_no_of_function_evaluations(ga_az, ga_imdb, ga_yelp, success_thresh, runs)
afes_kbga = average_no_of_function_evaluations(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
afes = pd.concat([afes_ga, afes_kbga], axis=1)
afes

  afes = afes.append(afes.mean(), ignore_index=True)
  afes = afes.append(afes.mean(), ignore_index=True)


Unnamed: 0_level_0,AFES,AFES
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,28.5,14.7
IMDB,57.8,17.466667
Yelp,6.766667,29.933333
Average,31.022222,20.7


In [129]:
sp_ga = successful_performance(ga_az, ga_imdb, ga_yelp, success_thresh, runs)
sp_kbga = successful_performance(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
sp = pd.concat([sp_ga, sp_kbga], axis=1)
sp

  afes = afes.append(afes.mean(), ignore_index=True)
  pc = pc.append(pc.mean(), ignore_index=True)
  sp = sp.append(sp.mean(), ignore_index=True)
  afes = afes.append(afes.mean(), ignore_index=True)
  pc = pc.append(pc.mean(), ignore_index=True)
  sp = sp.append(sp.mean(), ignore_index=True)


Unnamed: 0_level_0,SP,SP
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,77.727273,14.7
IMDB,75.391304,17.466667
Yelp,67.666667,29.933333
Average,73.595081,20.7


## Save as CSV

In [149]:
def f(r):
    return r.map('{:.3f}'.format)

with open('report.csv', 'wb') as rf:
    for i in [abfv, abog, oa, el, pc, afes, sp]:
        i = i.apply(f, axis=1)
        i.to_csv(rf, header=True)