In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tabulate import tabulate

In [6]:
ga_path = '../pickles/ga/'
kbga_path = '../pickles/kbga/corrected/'

In [7]:
ga_files = ['n_run_az_100fc_ts.pkl', 
            "n_run_az_ga_100fc_copt_avg.pkl"]

In [8]:
kbga_files = ['n_run_az_kbga_es50_100.pkl',
              'n_run_az_kbga_vp.pkl',
              'n_run_az_kbga_es10_100.pkl',
              'n_run_az_kbga_nokbps.pkl', 
              'n_run_az_kbga_cmp.pkl']

In [162]:
def average_best_fitness_value(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        az.iloc[-1, [0, 24, 49, 74, 99]],
        imdb.iloc[-1, [0, 24, 49, 74, 99]],
        yelp.iloc[-1, [0, 24, 49, 74, 99]],
        ]
    
    abfv = pd.DataFrame(data, columns=cols)
    abfv = abfv.append(abfv.mean(), ignore_index=True)

    abfv.index = index_col
    abfv.index.name = "Datasets"
    
    return abfv

In [163]:
def average_best_of_generation(az, imdb, yelp):
    cols = ["1-25", "26-50", "51-75", "76-100"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        [az.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [imdb.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [yelp.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
    ]

    abog = pd.DataFrame(data, columns=cols)
    abog = abog.append(abog.mean(), ignore_index=True)

    abog.index = index_col
    abog.index.name = "Datasets"
    return abog

In [164]:
def optimization_accuracy(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    mins = min(base_az.min().to_list())
    maxs = max(base_az.max().to_list())
    
    data = [
        list(map(lambda i: (i-mins)/(maxs-mins), az.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), imdb.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), yelp.iloc[-1, [0, 24, 49, 74, 99]])),
    ]

    oa = pd.DataFrame(data, columns=cols)
    oa = oa.append(oa.mean(), ignore_index=True)

    oa.index = index_col
    oa.index.name = "Datasets"
    return oa

In [349]:
def evolutionary_leap(base_az, runs):
    leap = [[0, 0, 0, 0, 0]]
    for i in range(1, 100):
        # print(ga_az.iloc[:, i] - ga_az.iloc[:, i-1])
        leap.append(list(map(lambda x: 1 if x != 0 else 0, base_az.iloc[:-1, i] - base_az.iloc[:-1, i-1])))
    leap = np.transpose(leap)

    indexes = ["run_"+str(i) for i in range(1, 5+1)]
    leap_df = pd.DataFrame(leap, columns=base_az.columns[:-1])
    leap_df.index = indexes

    data = {
        "gen_25": leap_df.iloc[:, :25].sum(axis=1),
        "gen_50": leap_df.iloc[:, 25:50].sum(axis=1),
        "gen_75": leap_df.iloc[:, 50:75].sum(axis=1),
        "gen_100": leap_df.iloc[:, 75:].sum(axis=1)
    }
    
    leap_count = pd.DataFrame(data)
    leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
    
    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    leap_count.index = indexes
    leap_count.index.name = 'runs'
    
    return leap_count

In [350]:
leaps = evolutionary_leap(ga_az, 5)
leaps

Unnamed: 0_level_0,gen_25,gen_50,gen_75,gen_100
runs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
run_1,8.0,6.0,6.0,2.0
run_2,11.0,6.0,2.0,1.0
run_3,9.0,4.0,0.0,1.0
run_4,7.0,3.0,0.0,0.0
run_5,8.0,1.0,4.0,3.0
Average,8.6,4.0,2.4,1.4


In [348]:
leaps.iloc[-1]/5

gen_25     1.72
gen_50     0.80
gen_75     0.48
gen_100    0.28
Name: Average, dtype: float64

In [344]:
def likelihood_of_evolution_leap(az, imdb, yelp, runs):
    cols = ["gen_"+str(i) for i in [25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    az_leaps = evolutionary_leap(az, runs)
    imdb_leaps = evolutionary_leap(imdb, runs)
    yelp_leaps = evolutionary_leap(yelp, runs)

    data = [
        az_leaps.iloc[-1]/runs,
        imdb_leaps.iloc[-1]/runs,
        yelp_leaps.iloc[-1]/runs,
    ]

    el = pd.DataFrame(data, columns=cols)
    el = el.append(el.mean(), ignore_index=True)

    el.index = index_col
    el.index.name = "Datasets"

    return el

In [None]:
def probability_of_convergence():
    pass

In [None]:
def average_no_of_function_evaluations():
    pass

In [None]:
def successful_performance():
    pass

In [None]:
def t_test():
    pass

In [332]:
def tabulate_runs(save_path, runs):
    with open(save_path, 'rb') as gf:
        data = pickle.load(gf)
        chromos = []
        scores = []
        exec_time = []
        for run in data:
            chromos.append(run[0])
            scores.append(run[1])
            exec_time.append(run[2])

    df = pd.DataFrame()
    cols = ["gen_" + str(i) for i in range(1, 101)]
    df[cols] = pd.DataFrame(scores)

    df = pd.concat([df, pd.DataFrame({'exec_time': exec_time})], axis=1)
    df = df.append(df.mean(), ignore_index=True)

    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    df.index = indexes
    df.index.name = 'runs'
    
    return df

In [345]:
runs = 5

ga_az = tabulate_runs(ga_path+"corrected/"+ga_files[0], runs)
kbga_az = tabulate_runs(kbga_path+kbga_files[1], runs)

base_az = pd.concat([ga_az.iloc[:, :-1], kbga_az.iloc[:, :-1]], axis=1)
base_az

Unnamed: 0_level_0,gen_1,gen_2,gen_3,gen_4,gen_5,gen_6,gen_7,gen_8,gen_9,gen_10,...,gen_91,gen_92,gen_93,gen_94,gen_95,gen_96,gen_97,gen_98,gen_99,gen_100
runs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
run_1,0.624,0.624,0.66,0.66,0.66,0.668,0.676,0.68,0.68,0.684,...,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9
run_2,0.624,0.652,0.664,0.668,0.696,0.696,0.696,0.704,0.704,0.712,...,0.884,0.888,0.888,0.888,0.888,0.892,0.892,0.892,0.892,0.892
run_3,0.62,0.644,0.644,0.648,0.648,0.652,0.676,0.676,0.68,0.688,...,0.892,0.892,0.892,0.892,0.892,0.892,0.892,0.892,0.892,0.896
run_4,0.668,0.668,0.668,0.668,0.668,0.676,0.676,0.676,0.688,0.688,...,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896
run_5,0.632,0.64,0.64,0.676,0.676,0.676,0.676,0.676,0.68,0.68,...,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896,0.896
Average,0.6336,0.6456,0.6552,0.664,0.6696,0.6736,0.68,0.6824,0.6864,0.6904,...,0.8936,0.8944,0.8944,0.8944,0.8944,0.8952,0.8952,0.8952,0.8952,0.896


## Measures

In [153]:
abfv_ga = average_best_fitness_value(ga_az, ga_az, ga_az)
abfv_kbga = average_best_fitness_value(kbga_az, kbga_az, kbga_az)
abfv = pd.concat([abfv_ga, abfv_kbga], axis=1)
abfv

Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.6336,0.7248,0.7464,0.764,0.7696,0.6768,0.8368,0.8736,0.888,0.896
IMDB,0.6336,0.7248,0.7464,0.764,0.7696,0.6768,0.8368,0.8736,0.888,0.896
Yelp,0.6336,0.7248,0.7464,0.764,0.7696,0.6768,0.8368,0.8736,0.888,0.896
Average,0.6336,0.7248,0.7464,0.764,0.7696,0.6768,0.8368,0.8736,0.888,0.896


In [154]:
abog_ga = average_best_of_generation(ga_az, ga_az, ga_az)
abog_kbga = average_best_of_generation(kbga_az, kbga_az, kbga_az)
abog = pd.concat([abog_ga, abog_kbga], axis=1)
abog

Unnamed: 0_level_0,1-25,26-50,51-75,76-100,1-25,26-50,51-75,76-100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,0.696384,0.738208,0.756352,0.766944,0.781792,0.859904,0.881696,0.892896
IMDB,0.696384,0.738208,0.756352,0.766944,0.781792,0.859904,0.881696,0.892896
Yelp,0.696384,0.738208,0.756352,0.766944,0.781792,0.859904,0.881696,0.892896
Average,0.696384,0.738208,0.756352,0.766944,0.781792,0.859904,0.881696,0.892896


In [155]:
oa_ga = optimization_accuracy(ga_az, ga_az, ga_az)
oa_kbga = optimization_accuracy(kbga_az, kbga_az, kbga_az)
oa = pd.concat([oa_ga, oa_kbga], axis=1)
oa

Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.048571,0.374286,0.451429,0.514286,0.534286,0.202857,0.774286,0.905714,0.957143,0.985714
IMDB,0.048571,0.374286,0.451429,0.514286,0.534286,0.202857,0.774286,0.905714,0.957143,0.985714
Yelp,0.048571,0.374286,0.451429,0.514286,0.534286,0.202857,0.774286,0.905714,0.957143,0.985714
Average,0.048571,0.374286,0.451429,0.514286,0.534286,0.202857,0.774286,0.905714,0.957143,0.985714


In [353]:
el_ga = likelihood_of_evolution_leap(ga_az, ga_az, ga_az, runs)
el_kbga = likelihood_of_evolution_leap(kbga_az, kbga_az, kbga_az, runs)
el = pd.concat([el_ga, el_kbga], axis=1)
el

Unnamed: 0_level_0,gen_25,gen_50,gen_75,gen_100,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,1.72,0.8,0.48,0.28,3.24,1.56,0.68,0.4
IMDB,1.72,0.8,0.48,0.28,3.24,1.56,0.68,0.4
Yelp,1.72,0.8,0.48,0.28,3.24,1.56,0.68,0.4
Average,1.72,0.8,0.48,0.28,3.24,1.56,0.68,0.4


## Save as CSV

In [352]:
with open('report.csv', 'wb') as rf:
    base_az.to_csv(rf)

with open('report.csv', 'ab') as rf:
    for i in [abfv, abog, oa, el]:
        i.to_csv(rf, header=True)