In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
from tabulate import tabulate

In [2]:
ga_path = '../pickles/ga/'
kbga_path = '../pickles/kbga/30runs/'

In [3]:
ga_files = ['n_run_az_100fc_ts.pkl', 
            "n_run_az_ga_100fc_copt_avg.pkl"]

In [5]:
kbga_files = ['30_run_az_kbga_vp_ts.pkl',
              '30_run_imdb_kbga_vp_ts.pkl',
              'n_run_az_kbga_es50_100.pkl',
              'n_run_az_kbga_vp.pkl',
              'n_run_az_kbga_es10_100.pkl',
              'n_run_az_kbga_nokbps.pkl', 
              'n_run_az_kbga_cmp.pkl']

In [6]:
def average_best_fitness_value(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        az.iloc[-1, [0, 24, 49, 74, 99]],
        imdb.iloc[-1, [0, 24, 49, 74, 99]],
        yelp.iloc[-1, [0, 24, 49, 74, 99]],
        ]
    
    abfv = pd.DataFrame(data, columns=cols)
    abfv = abfv.append(abfv.mean(), ignore_index=True)

    abfv.index = index_col
    abfv.index.name = "Datasets"
    
    return abfv

In [7]:
def average_best_of_generation(az, imdb, yelp):
    cols = ["1-25", "26-50", "51-75", "76-100"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        [az.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [imdb.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [yelp.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
    ]

    abog = pd.DataFrame(data, columns=cols)
    abog = abog.append(abog.mean(), ignore_index=True)

    abog.index = index_col
    abog.index.name = "Datasets"
    return abog

In [8]:
def optimization_accuracy(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    mins = min(base_az.min().to_list())
    maxs = max(base_az.max().to_list())
    
    data = [
        list(map(lambda i: (i-mins)/(maxs-mins), az.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), imdb.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), yelp.iloc[-1, [0, 24, 49, 74, 99]])),
    ]

    oa = pd.DataFrame(data, columns=cols)
    oa = oa.append(oa.mean(), ignore_index=True)

    oa.index = index_col
    oa.index.name = "Datasets"
    return oa

In [60]:
def evolutionary_leap(base, runs):
    leap = [[0 for i in range(runs)]]
    for i in range(1, 100):
        # print(ga_az.iloc[:, i] - ga_az.iloc[:, i-1])
        leap.append(list(map(lambda x: 1 if x != 0 else 0, base.iloc[:-1, i] - base.iloc[:-1, i-1])))
    leap = np.transpose(leap)

    indexes = ["run_"+str(i) for i in range(1, runs+1)]
    leap_df = pd.DataFrame(leap, columns=base.columns[:-1])
    leap_df.index = indexes

    data = {
        "gen_25": leap_df.iloc[:, :25].sum(axis=1),
        "gen_50": leap_df.iloc[:, 25:50].sum(axis=1),
        "gen_75": leap_df.iloc[:, 50:75].sum(axis=1),
        "gen_100": leap_df.iloc[:, 75:].sum(axis=1)
    }
    
    leap_count = pd.DataFrame(data)
    leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
    
    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    leap_count.index = indexes
    leap_count.index.name = 'runs'
    
    return leap_count


def likelihood_of_evolution_leap(az, imdb, yelp, runs):
    cols = ["gen_"+str(i) for i in [25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    az_leaps = evolutionary_leap(az, runs)
    imdb_leaps = evolutionary_leap(imdb, runs)
    yelp_leaps = evolutionary_leap(yelp, runs)

    data = [
        az_leaps.iloc[-1]/runs,
        imdb_leaps.iloc[-1]/runs,
        yelp_leaps.iloc[-1]/runs,
    ]

    el = pd.DataFrame(data, columns=cols)
    el = el.append(el.mean(), ignore_index=True)

    el.index = index_col
    el.index.name = "Datasets"

    return el

In [10]:
def probability_of_convergence(az, imdb, yelp, success_thresh, runs):
    az_count = 0
    imdb_count = 0
    yelp_count = 0

    for i in range(runs):
        if az.iloc[i, -2] >= success_thresh:
            az_count += 1
        if imdb.iloc[i, -2] >= success_thresh:
            imdb_count += 1
        if yelp.iloc[i, -2] >= success_thresh:
            yelp_count += 1

    cols = ["P"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        az_count/runs,
        imdb_count/runs,
        yelp_count/runs
    ]

    pc = pd.DataFrame(data, columns=cols)
    pc = pc.append(pc.mean(), ignore_index=True)

    pc.index = index_col
    pc.index.name = "Datasets"

    return pc

In [11]:
def function_evaluations(base, success_thresh, runs):
    evolutions = 0

    mask = base.iloc[:, :-1] >= success_thresh
    for i in range(runs):
        try:
            # print(base[mask].iloc[i].dropna())
            evolutions += int(base[mask].iloc[i].dropna().index[0].split("_")[1]) 
        except:
            evolutions += 0

    return evolutions


def average_no_of_function_evaluations(az, imdb, yelp, success_thresh, runs):
    az_eval = function_evaluations(az, success_thresh, runs)
    imdb_eval = function_evaluations(imdb, success_thresh, runs)
    yelp_eval = function_evaluations(yelp, success_thresh, runs)

    cols = ["AFES"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        az_eval/runs,
        imdb_eval/runs,
        yelp_eval/runs
    ]

    afes = pd.DataFrame(data, columns=cols)
    afes = afes.append(afes.mean(), ignore_index=True)

    afes.index = index_col
    afes.index.name = "Datasets"

    return afes

In [13]:
def successful_performance(az, imdb, yelp, success_thresh, runs):
    afes = average_no_of_function_evaluations(az, imdb, yelp, success_thresh, runs)
    p = probability_of_convergence(az, imdb, yelp, success_thresh, runs)

    cols = ["SP"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        afes.iloc[0, 0]/p.iloc[0, 0],
        afes.iloc[1, 0]/p.iloc[1, 0],
        afes.iloc[2, 0]/p.iloc[2, 0]
    ]

    sp = pd.DataFrame(data, columns=cols)
    sp = sp.append(sp.mean(), ignore_index=True)

    sp.index = index_col
    sp.index.name = "Datasets"

    return sp

In [12]:
from scipy import stats

def one_tailed_t_test(abfv, abog, oa, el):
    h0 = False
    alpha = 0.05


    t_value, p_value = stats.ttest_1samp(abfv.iloc[:-1, :5], abfv.iloc[:-1, 5:])
    p_df = pd.DataFrame(p_value).astype()
    print(p_df)
        

In [14]:
one_tailed_t_test(abfv, abog, oa, el)

NameError: name 'abfv' is not defined

In [15]:
def tabulate_runs(save_path, runs):
    with open(save_path, 'rb') as gf:
        data = pickle.load(gf)
        chromos = []
        scores = []
        exec_time = []
        for run in data:
            chromos.append(run[0])
            scores.append(run[1])
            exec_time.append(run[2])

    df = pd.DataFrame()
    cols = ["gen_" + str(i) for i in range(1, 101)]
    df[cols] = pd.DataFrame(scores)

    df = pd.concat([df, pd.DataFrame({'exec_time': exec_time})], axis=1)
    df = df.append(df.mean(), ignore_index=True)

    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    df.index = indexes
    df.index.name = 'runs'
    
    return df

In [44]:
runs = 30
success_thresh = 0.8

# ga_az = tabulate_runs(ga_path+"corrected/"+ga_files[0], runs)
ga_az = tabulate_runs(kbga_path+kbga_files[0], runs)
ga_imdb = ga_az
ga_yelp = ga_az

kbga_az = tabulate_runs(kbga_path+kbga_files[0], runs)
kbga_imdb = tabulate_runs(kbga_path+kbga_files[1], runs)
kbga_yelp = kbga_az

base_az = pd.concat([ga_az.iloc[:, :-1], kbga_az.iloc[:, :-1]], axis=1)
base_az

  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)
  df = df.append(df.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_1,gen_2,gen_3,gen_4,gen_5,gen_6,gen_7,gen_8,gen_9,gen_10,...,gen_91,gen_92,gen_93,gen_94,gen_95,gen_96,gen_97,gen_98,gen_99,gen_100
runs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
run_1,0.688,0.688,0.724,0.732,0.732,0.748,0.748,0.76,0.764,0.768,...,0.892,0.892,0.892,0.892,0.892,0.892,0.892,0.896,0.896,0.896
run_2,0.692,0.704,0.704,0.724,0.728,0.728,0.74,0.764,0.764,0.764,...,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.908,0.908,0.908
run_3,0.696,0.704,0.732,0.732,0.74,0.748,0.76,0.76,0.764,0.784,...,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9
run_4,0.684,0.684,0.7,0.712,0.752,0.752,0.764,0.772,0.776,0.784,...,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.904
run_5,0.712,0.724,0.72,0.728,0.728,0.752,0.752,0.76,0.76,0.76,...,0.884,0.884,0.884,0.884,0.884,0.884,0.884,0.884,0.888,0.888
run_6,0.7,0.712,0.712,0.736,0.74,0.772,0.776,0.792,0.792,0.792,...,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.904,0.904
run_7,0.684,0.7,0.72,0.724,0.74,0.752,0.756,0.768,0.776,0.788,...,0.884,0.884,0.884,0.884,0.884,0.884,0.884,0.884,0.884,0.884
run_8,0.676,0.68,0.68,0.692,0.708,0.724,0.74,0.756,0.76,0.764,...,0.888,0.888,0.888,0.888,0.888,0.888,0.888,0.888,0.888,0.888
run_9,0.684,0.688,0.732,0.772,0.764,0.764,0.764,0.768,0.796,0.796,...,0.892,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896
run_10,0.676,0.696,0.696,0.716,0.728,0.728,0.748,0.752,0.756,0.78,...,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9


## Measures

In [45]:
abfv_ga = average_best_fitness_value(ga_az, ga_imdb, ga_yelp)
abfv_kbga = average_best_fitness_value(kbga_az, kbga_imdb, kbga_yelp)
abfv = pd.concat([abfv_ga, abfv_kbga], axis=1)
abfv

  abfv = abfv.append(abfv.mean(), ignore_index=True)
  abfv = abfv.append(abfv.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.688667,0.834267,0.870667,0.885467,0.892267,0.688667,0.834267,0.870667,0.885467,0.892267
IMDB,0.688667,0.834267,0.870667,0.885467,0.892267,0.700535,0.827718,0.877094,0.903209,0.917736
Yelp,0.688667,0.834267,0.870667,0.885467,0.892267,0.688667,0.834267,0.870667,0.885467,0.892267
Average,0.688667,0.834267,0.870667,0.885467,0.892267,0.692623,0.832084,0.872809,0.891381,0.900757


In [19]:
abog_ga = average_best_of_generation(ga_az, ga_imdb, ga_yelp)
abog_kbga = average_best_of_generation(kbga_az, kbga_imdb, kbga_yelp)
abog = pd.concat([abog_ga, abog_kbga], axis=1)
abog

  abog = abog.append(abog.mean(), ignore_index=True)
  abog = abog.append(abog.mean(), ignore_index=True)


Unnamed: 0_level_0,1-25,26-50,51-75,76-100,1-25,26-50,51-75,76-100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,0.78184,0.856693,0.879093,0.889339,0.78184,0.856693,0.879093,0.889339
IMDB,0.78184,0.856693,0.879093,0.889339,0.78184,0.856693,0.879093,0.889339
Yelp,0.78184,0.856693,0.879093,0.889339,0.78184,0.856693,0.879093,0.889339
Average,0.78184,0.856693,0.879093,0.889339,0.78184,0.856693,0.879093,0.889339


In [46]:
oa_ga = optimization_accuracy(ga_az, ga_imdb, ga_yelp)
oa_kbga = optimization_accuracy(kbga_az, kbga_imdb, kbga_yelp)
oa = pd.concat([oa_ga, oa_kbga], axis=1)
oa

  oa = oa.append(oa.mean(), ignore_index=True)
  oa = oa.append(oa.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.101093,0.697814,0.846995,0.90765,0.935519,0.101093,0.697814,0.846995,0.90765,0.935519
IMDB,0.101093,0.697814,0.846995,0.90765,0.935519,0.149733,0.670977,0.873338,0.980363,1.039902
Yelp,0.101093,0.697814,0.846995,0.90765,0.935519,0.101093,0.697814,0.846995,0.90765,0.935519
Average,0.101093,0.697814,0.846995,0.90765,0.935519,0.117306,0.688868,0.855776,0.931888,0.970314


In [61]:
el_ga = likelihood_of_evolution_leap(ga_az, ga_imdb, ga_yelp, runs)
el_kbga = likelihood_of_evolution_leap(kbga_az, kbga_imdb, kbga_yelp, runs)
el = pd.concat([el_ga, el_kbga], axis=1)
el

  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  el = el.append(el.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
  el = el.append(el.mean(), ignore_index=True)


Unnamed: 0_level_0,gen_25,gen_50,gen_75,gen_100,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,0.508889,0.25,0.118889,0.056667,0.508889,0.25,0.118889,0.056667
IMDB,0.508889,0.25,0.118889,0.056667,0.546667,0.414444,0.278889,0.171111
Yelp,0.508889,0.25,0.118889,0.056667,0.508889,0.25,0.118889,0.056667
Average,0.508889,0.25,0.118889,0.056667,0.521481,0.304815,0.172222,0.094815


In [25]:
pc_ga = probability_of_convergence(ga_az, ga_imdb, ga_yelp, success_thresh, runs)
pc_kbga = probability_of_convergence(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
pc = pd.concat([pc_ga, pc_kbga], axis=1)
pc

  pc = pc.append(pc.mean(), ignore_index=True)
  pc = pc.append(pc.mean(), ignore_index=True)


Unnamed: 0_level_0,P,P
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,1.0,1.0
IMDB,1.0,1.0
Yelp,1.0,1.0
Average,1.0,1.0


In [24]:
afes_ga = average_no_of_function_evaluations(ga_az, ga_imdb, ga_yelp, success_thresh, runs)
afes_kbga = average_no_of_function_evaluations(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
afes = pd.concat([afes_ga, afes_kbga], axis=1)
afes

  afes = afes.append(afes.mean(), ignore_index=True)
  afes = afes.append(afes.mean(), ignore_index=True)


Unnamed: 0_level_0,AFES,AFES
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,14.7,14.7
IMDB,14.7,17.466667
Yelp,14.7,14.7
Average,14.7,15.622222


In [23]:
sp_ga = successful_performance(ga_az, ga_imdb, ga_yelp, success_thresh, runs)
sp_kbga = successful_performance(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
sp = pd.concat([sp_ga, sp_kbga], axis=1)
sp

  afes = afes.append(afes.mean(), ignore_index=True)
  pc = pc.append(pc.mean(), ignore_index=True)
  sp = sp.append(sp.mean(), ignore_index=True)
  afes = afes.append(afes.mean(), ignore_index=True)
  pc = pc.append(pc.mean(), ignore_index=True)
  sp = sp.append(sp.mean(), ignore_index=True)


Unnamed: 0_level_0,SP,SP
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,14.7,14.7
IMDB,14.7,17.466667
Yelp,14.7,14.7
Average,14.7,15.622222


## Save as CSV

In [62]:
with open('report.csv', 'wb') as rf:
    base_az.to_csv(rf)

with open('report.csv', 'ab') as rf:
    for i in [abfv, abog, oa, el, pc, afes, sp]:
        i.to_csv(rf, header=True)