In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
from tabulate import tabulate

In [26]:
ga_path = '../pickles/ga/'
kbga_path = '../pickles/kbga/30runs/'

In [24]:
ga_files = ['n_run_az_100fc_ts.pkl', 
            "n_run_az_ga_100fc_copt_avg.pkl"]

In [5]:
kbga_files = ['30_run_az_kbga_vp_ts.pkl',
              '30_run_imdb_kbga_vp_ts.pkl',
              'n_run_az_kbga_es50_100.pkl',
              'n_run_az_kbga_vp.pkl',
              'n_run_az_kbga_es10_100.pkl',
              'n_run_az_kbga_nokbps.pkl', 
              'n_run_az_kbga_cmp.pkl']

In [6]:
def average_best_fitness_value(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        az.iloc[-1, [0, 24, 49, 74, 99]],
        imdb.iloc[-1, [0, 24, 49, 74, 99]],
        yelp.iloc[-1, [0, 24, 49, 74, 99]],
        ]
    
    abfv = pd.DataFrame(data, columns=cols)
    abfv = abfv.append(abfv.mean(), ignore_index=True)

    abfv.index = index_col
    abfv.index.name = "Datasets"
    
    return abfv

In [7]:
def average_best_of_generation(az, imdb, yelp):
    cols = ["1-25", "26-50", "51-75", "76-100"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    data = [
        [az.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [imdb.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
        [yelp.iloc[-1, i-25:i].mean() for i in range(25, 101, 25)],
    ]

    abog = pd.DataFrame(data, columns=cols)
    abog = abog.append(abog.mean(), ignore_index=True)

    abog.index = index_col
    abog.index.name = "Datasets"
    return abog

In [8]:
def optimization_accuracy(az, imdb, yelp):
    cols = ["gen_"+str(i) for i in [1, 25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]
    
    mins = min(base_az.min().to_list())
    maxs = max(base_az.max().to_list())
    
    data = [
        list(map(lambda i: (i-mins)/(maxs-mins), az.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), imdb.iloc[-1, [0, 24, 49, 74, 99]])),
        list(map(lambda i: (i-mins)/(maxs-mins), yelp.iloc[-1, [0, 24, 49, 74, 99]])),
    ]

    oa = pd.DataFrame(data, columns=cols)
    oa = oa.append(oa.mean(), ignore_index=True)

    oa.index = index_col
    oa.index.name = "Datasets"
    return oa

In [9]:
def evolutionary_leap(base, runs):
    leap = [[0 for i in range(runs)]]
    for i in range(1, 100):
        # print(ga_az.iloc[:, i] - ga_az.iloc[:, i-1])
        leap.append(list(map(lambda x: 1 if x != 0 else 0, base.iloc[:-1, i] - base.iloc[:-1, i-1])))
    leap = np.transpose(leap)

    indexes = ["run_"+str(i) for i in range(1, runs+1)]
    leap_df = pd.DataFrame(leap, columns=base.columns[:-1])
    leap_df.index = indexes

    data = {
        "gen_25": leap_df.iloc[:, :25].sum(axis=1),
        "gen_50": leap_df.iloc[:, 25:50].sum(axis=1),
        "gen_75": leap_df.iloc[:, 50:75].sum(axis=1),
        "gen_100": leap_df.iloc[:, 75:].sum(axis=1)
    }
    
    leap_count = pd.DataFrame(data)
    leap_count = leap_count.append(leap_count.mean(), ignore_index=True)
    
    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    leap_count.index = indexes
    leap_count.index.name = 'runs'
    
    return leap_count


def likelihood_of_evolution_leap(az, imdb, yelp, runs):
    cols = ["gen_"+str(i) for i in [25, 50, 75, 100]]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    az_leaps = evolutionary_leap(az, runs)
    imdb_leaps = evolutionary_leap(imdb, runs)
    yelp_leaps = evolutionary_leap(yelp, runs)

    data = [
        az_leaps.iloc[-1]/runs,
        imdb_leaps.iloc[-1]/runs,
        yelp_leaps.iloc[-1]/runs,
    ]

    el = pd.DataFrame(data, columns=cols)
    el = el.append(el.mean(), ignore_index=True)

    el.index = index_col
    el.index.name = "Datasets"

    return el

In [10]:
def probability_of_convergence(az, imdb, yelp, success_thresh, runs):
    az_count = 0
    imdb_count = 0
    yelp_count = 0

    for i in range(runs):
        if az.iloc[i, -2] >= success_thresh:
            az_count += 1
        if imdb.iloc[i, -2] >= success_thresh:
            imdb_count += 1
        if yelp.iloc[i, -2] >= success_thresh:
            yelp_count += 1

    cols = ["P"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        az_count/runs,
        imdb_count/runs,
        yelp_count/runs
    ]

    pc = pd.DataFrame(data, columns=cols)
    pc = pc.append(pc.mean(), ignore_index=True)

    pc.index = index_col
    pc.index.name = "Datasets"

    return pc

In [11]:
def function_evaluations(base, success_thresh, runs):
    evolutions = 0

    mask = base.iloc[:, :-1] >= success_thresh
    for i in range(runs):
        try:
            # print(base[mask].iloc[i].dropna())
            evolutions += int(base[mask].iloc[i].dropna().index[0].split("_")[1]) 
        except:
            evolutions += 0

    return evolutions


def average_no_of_function_evaluations(az, imdb, yelp, success_thresh, runs):
    az_eval = function_evaluations(az, success_thresh, runs)
    imdb_eval = function_evaluations(imdb, success_thresh, runs)
    yelp_eval = function_evaluations(yelp, success_thresh, runs)

    cols = ["AFES"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        az_eval/runs,
        imdb_eval/runs,
        yelp_eval/runs
    ]

    afes = pd.DataFrame(data, columns=cols)
    afes = afes.append(afes.mean(), ignore_index=True)

    afes.index = index_col
    afes.index.name = "Datasets"

    return afes

In [12]:
def successful_performance(az, imdb, yelp, success_thresh, runs):
    afes = average_no_of_function_evaluations(az, imdb, yelp, success_thresh, runs)
    p = probability_of_convergence(az, imdb, yelp, success_thresh, runs)

    cols = ["SP"]
    index_col = ["Amazon", "IMDB", "Yelp", "Average"]

    data = [
        afes.iloc[0, 0]/p.iloc[0, 0],
        afes.iloc[1, 0]/p.iloc[1, 0],
        afes.iloc[2, 0]/p.iloc[2, 0]
    ]

    sp = pd.DataFrame(data, columns=cols)
    sp = sp.append(sp.mean(), ignore_index=True)

    sp.index = index_col
    sp.index.name = "Datasets"

    return sp

In [13]:
from scipy import stats

def one_tailed_t_test(abfv, abog, oa, el):
    h0 = False
    alpha = 0.05


    t_value, p_value = stats.ttest_1samp(abfv.iloc[:-1, :5], abfv.iloc[:-1, 5:])
    p_df = pd.DataFrame(p_value).astype()
    print(p_df)
        

In [14]:
one_tailed_t_test(abfv, abog, oa, el)

NameError: name 'abfv' is not defined

In [17]:
def tabulate_runs(save_path, runs):
    with open(save_path, 'rb') as gf:
        data = pickle.load(gf)
        chromos = []
        scores = []
        exec_time = []
        for run in data:
            chromos.append(run[0])
            scores.append(run[1])
            exec_time.append(run[2])

    df = pd.DataFrame()
    cols = ["gen_" + str(i) for i in range(1, 101)]
    df[cols] = pd.DataFrame(scores)

    df = pd.concat([df, pd.DataFrame({'exec_time': exec_time})], axis=1)
    df = df.append(df.mean(), ignore_index=True)

    indexes = ["run_"+str(i) for i in range(1, runs+1)] + ['Average']
    df.index = indexes
    df.index.name = 'runs'
    
    return df

In [29]:
runs = 30
success_thresh = 0.8

ga_az = tabulate_runs(ga_path+"corrected/"+ga_files[0], 5)
# ga_az = tabulate_runs(kbga_path+kbga_files[0], runs)
ga_imdb = ga_az
ga_yelp = ga_az

kbga_az = tabulate_runs(kbga_path+kbga_files[0], runs)
kbga_imdb = tabulate_runs(kbga_path+kbga_files[1], runs)
kbga_yelp = kbga_az


  df = df.append(df.mean(), ignore_index=True)


## Measures

In [37]:
abfv_ga = average_best_fitness_value(ga_az, ga_imdb, ga_yelp)
abfv_kbga = average_best_fitness_value(kbga_az, kbga_imdb, kbga_yelp)
abfv = pd.concat([abfv_ga, abfv_kbga], axis=1)
abfv

Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.6336,0.7248,0.7464,0.764,0.7696,0.688667,0.834267,0.870667,0.885467,0.892267
IMDB,0.6336,0.7248,0.7464,0.764,0.7696,0.700535,0.827718,0.877094,0.903209,0.917736
Yelp,0.6336,0.7248,0.7464,0.764,0.7696,0.688667,0.834267,0.870667,0.885467,0.892267
Average,0.6336,0.7248,0.7464,0.764,0.7696,0.692623,0.832084,0.872809,0.891381,0.900757


In [36]:
abog_ga = average_best_of_generation(ga_az, ga_imdb, ga_yelp)
abog_kbga = average_best_of_generation(kbga_az, kbga_imdb, kbga_yelp)
abog = pd.concat([abog_ga, abog_kbga], axis=1)
abog

Unnamed: 0_level_0,1-25,26-50,51-75,76-100,1-25,26-50,51-75,76-100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,0.696384,0.738208,0.756352,0.766944,0.78184,0.856693,0.879093,0.889339
IMDB,0.696384,0.738208,0.756352,0.766944,0.776873,0.855918,0.891547,0.911127
Yelp,0.696384,0.738208,0.756352,0.766944,0.78184,0.856693,0.879093,0.889339
Average,0.696384,0.738208,0.756352,0.766944,0.780184,0.856435,0.883245,0.896601


In [35]:
oa_ga = optimization_accuracy(ga_az, ga_imdb, ga_yelp)
oa_kbga = optimization_accuracy(kbga_az, kbga_imdb, kbga_yelp)
oa = pd.concat([oa_ga, oa_kbga], axis=1)
oa

Unnamed: 0_level_0,gen_1,gen_25,gen_50,gen_75,gen_100,gen_1,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amazon,0.047222,0.363889,0.438889,0.5,0.519444,0.238426,0.743981,0.87037,0.921759,0.94537
IMDB,0.047222,0.363889,0.438889,0.5,0.519444,0.279635,0.721244,0.892689,0.983363,1.033806
Yelp,0.047222,0.363889,0.438889,0.5,0.519444,0.238426,0.743981,0.87037,0.921759,0.94537
Average,0.047222,0.363889,0.438889,0.5,0.519444,0.252162,0.736402,0.87781,0.942294,0.974849


In [34]:
el_ga = likelihood_of_evolution_leap(ga_az, ga_imdb, ga_yelp, 5)
el_kbga = likelihood_of_evolution_leap(kbga_az, kbga_imdb, kbga_yelp, runs)
el = pd.concat([el_ga, el_kbga], axis=1)
el

Unnamed: 0_level_0,gen_25,gen_50,gen_75,gen_100,gen_25,gen_50,gen_75,gen_100
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amazon,1.72,0.8,0.48,0.28,0.508889,0.25,0.118889,0.056667
IMDB,1.72,0.8,0.48,0.28,0.546667,0.414444,0.278889,0.171111
Yelp,1.72,0.8,0.48,0.28,0.508889,0.25,0.118889,0.056667
Average,1.72,0.8,0.48,0.28,0.521481,0.304815,0.172222,0.094815


In [31]:
pc_ga = probability_of_convergence(ga_az, ga_imdb, ga_yelp, success_thresh, 5)
pc_kbga = probability_of_convergence(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
pc = pd.concat([pc_ga, pc_kbga], axis=1)
pc

Unnamed: 0_level_0,P,P
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,0.2,1.0
IMDB,0.2,1.0
Yelp,0.2,1.0
Average,0.2,1.0


In [32]:
afes_ga = average_no_of_function_evaluations(ga_az, ga_imdb, ga_yelp, success_thresh, 5)
afes_kbga = average_no_of_function_evaluations(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
afes = pd.concat([afes_ga, afes_kbga], axis=1)
afes

Unnamed: 0_level_0,AFES,AFES
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,15.4,14.7
IMDB,15.4,17.466667
Yelp,15.4,14.7
Average,15.4,15.622222


In [33]:
sp_ga = successful_performance(ga_az, ga_imdb, ga_yelp, success_thresh, 5)
sp_kbga = successful_performance(kbga_az, kbga_imdb, kbga_yelp, success_thresh, runs)
sp = pd.concat([sp_ga, sp_kbga], axis=1)
sp

Unnamed: 0_level_0,SP,SP
Datasets,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon,77.0,14.7
IMDB,77.0,17.466667
Yelp,77.0,14.7
Average,77.0,15.622222


## Save as CSV

In [None]:
with open('report.csv', 'ab') as rf:
    for i in [abfv, abog, oa, el, pc, afes, sp]:
        i.to_csv(rf, header=True)