In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tabulate import tabulate

In [None]:
ga_path = '../pickles/ga/'
kbga_path = '../pickles/kbga/corrected/'

In [None]:
ga_files = ['n_run_az_100fc_ts.pkl', 
            "n_run_az_ga_100fc_copt_avg.pkl"]

In [None]:
kbga_files = ['n_run_az_kbga_es50_100.pkl',
              'n_run_az_kbga_vp.pkl',
              'n_run_az_kbga_es10_100.pkl',
              'n_run_az_kbga_nokbps.pkl', 
              'n_run_az_kbga_cmp.pkl']

In [None]:
def average_best_fitness_value(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        az.iloc[-1, [0, 24, 49, 74, 99]],
        imdb.iloc[-1, [0, 24, 49, 74, 99]],
        yelp.iloc[-1, [0, 24, 49, 74, 99]],
        ]
    
    abfv = pd.DataFrame(data, columns=cols)
    abfv = abfv.append(abfv.mean(), ignore_index=True)

    abfv.index = index_col
    abfv.index.name = "Datasets"
    
    return abfv

In [None]:
def average_best_of_generation(az, imdb, yelp):
    cols = ["1-25", "26-50", "51-75", "76-100"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        [az.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [imdb.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [yelp.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
    ]

    abog = pd.DataFrame(data, columns=cols)
    abog = abog.append(abog.mean(), ignore_index=True)

    abog.index = index_col
    abog.index.name = "Datasets"
    return abog

In [None]:
def optimization_accuracy(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    mins = min(base_az.min().to_list())
    maxs = max(base_az.max().to_list())
    
    data = [
        list(map(lambda i: (i-mins)/(maxs-mins), az.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), imdb.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), yelp.iloc[-1, [0, 24, 49, 74, 99]])),
    ]

    oa = pd.DataFrame(data, columns=cols)
    oa = oa.append(oa.mean(), ignore_index=True)

    oa.index = index_col
    oa.index.name = "Datasets"
    return oa

In [None]:
def evolutionary_leap(base_az, runs):
    leap = [[0, 0, 0, 0, 0]]
    for i in range(1, 100):
        # print(ga_az.iloc[:, i] - ga_az.iloc[:, i-1])
        leap.append(list(map(lambda x: 1 if x != 0 else 0, base_az.iloc[:-1, i] - base_az.iloc[:-1, i-1])))
    leap = np.transpose(leap)

    indexes = ["run_"+str(i) for i in range(1, 5+1)]
    leap_df = pd.DataFrame(leap, columns=base_az.columns[:-1])
    leap_df.index = indexes

    data = {
        "gen_25": leap_df.iloc[:, :25].sum(axis=1),
        "gen_50": leap_df.iloc[:, 25:50].sum(axis=1),
        "gen_75": leap_df.iloc[:, 50:75].sum(axis=1),
        "gen_100": leap_df.iloc[:, 75:].sum(axis=1)
    }
    
    leap_count = pd.DataFrame(data)
    leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
    
    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    leap_count.index = indexes
    leap_count.index.name = 'runs'
    
    return leap_count

In [None]:
leaps = evolutionary_leap(ga_az, 5)
leaps

In [None]:
leaps.iloc[-1]/5

In [None]:
def likelihood_of_evolution_leap(az, imdb, yelp, runs):
    cols = ["gen_"+str(i) for i in [25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    az_leaps = evolutionary_leap(az, runs)
    imdb_leaps = evolutionary_leap(imdb, runs)
    yelp_leaps = evolutionary_leap(yelp, runs)

    data = [
        az_leaps.iloc[-1]/runs,
        imdb_leaps.iloc[-1]/runs,
        yelp_leaps.iloc[-1]/runs,
    ]

    el = pd.DataFrame(data, columns=cols)
    el = el.append(el.mean(), ignore_index=True)

    el.index = index_col
    el.index.name = "Datasets"

    return el

In [None]:
def probability_of_convergence():
    pass

In [None]:
def average_no_of_function_evaluations():
    pass

In [None]:
def successful_performance(az, imdb, yelp):
    
    pass

In [None]:
def t_test():
    pass

In [None]:
def tabulate_runs(save_path, runs):
    with open(save_path, 'rb') as gf:
        data = pickle.load(gf)
        chromos = []
        scores = []
        exec_time = []
        for run in data:
            chromos.append(run[0])
            scores.append(run[1])
            exec_time.append(run[2])

    df = pd.DataFrame()
    cols = ["gen_" + str(i) for i in range(1, 101)]
    df[cols] = pd.DataFrame(scores)

    df = pd.concat([df, pd.DataFrame({'exec_time': exec_time})], axis=1)
    df = df.append(df.mean(), ignore_index=True)

    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    df.index = indexes
    df.index.name = 'runs'
    
    return df

In [None]:
runs = 5

ga_az = tabulate_runs(ga_path+"corrected/"+ga_files[0], runs)
kbga_az = tabulate_runs(kbga_path+kbga_files[1], runs)

base_az = pd.concat([ga_az.iloc[:, :-1], kbga_az.iloc[:, :-1]], axis=1)
base_az

## Measures

In [None]:
abfv_ga = average_best_fitness_value(ga_az, ga_az, ga_az)
abfv_kbga = average_best_fitness_value(kbga_az, kbga_az, kbga_az)
abfv = pd.concat([abfv_ga, abfv_kbga], axis=1)
abfv

In [None]:
abog_ga = average_best_of_generation(ga_az, ga_az, ga_az)
abog_kbga = average_best_of_generation(kbga_az, kbga_az, kbga_az)
abog = pd.concat([abog_ga, abog_kbga], axis=1)
abog

In [None]:
oa_ga = optimization_accuracy(ga_az, ga_az, ga_az)
oa_kbga = optimization_accuracy(kbga_az, kbga_az, kbga_az)
oa = pd.concat([oa_ga, oa_kbga], axis=1)
oa

In [None]:
el_ga = likelihood_of_evolution_leap(ga_az, ga_az, ga_az, runs)
el_kbga = likelihood_of_evolution_leap(kbga_az, kbga_az, kbga_az, runs)
el = pd.concat([el_ga, el_kbga], axis=1)
el

## Save as CSV

In [None]:
with open('report.csv', 'wb') as rf:
    base_az.to_csv(rf)

with open('report.csv', 'ab') as rf:
    for i in [abfv, abog, oa, el]:
        i.to_csv(rf, header=True)