In [1]:
import os 

### Find Best Params

First find the best params

In [8]:
def file_read_from_tail(fname,lines):
        fname = fname
        bufsize = 8192
        fsize = os.stat(fname).st_size
        iter = 0
        with open(fname) as f:
                if bufsize > fsize:
                        bufsize = fsize-1
                        data = []
                        while True:
                                iter +=1
                                f.seek(fsize-bufsize*iter)
                                data.extend(f.readlines())
                                if len(data) >= lines or f.tell() == 0:
                                        return data[-lines:]


In [24]:
def parse_mse(line):
    mse = float(line.strip()[4:])
    return mse

def get_key(f):
    k = f[:-6]
    if k[-1] == "_":
        return f[:-7]
    else:
        return k

In [27]:
from statistics import mean, stdev

def aggregateParams(experiment_dir):
    files = os.listdir(experiment_dir)
    
    mse_to_params = {}
    params_to_mse = {}
    
    # aggregate mse
    for f in files:
        key = get_key(f)
        d = file_read_from_tail(experiment_dir + f,3)
        if d == None:
            continue
        mse = parse_mse(d[0])
        
        if key not in params_to_mse:
            params_to_mse[key] = []
        
        params_to_mse[key].append(mse)
                
    # flip and sort mses
    all_mse = []
    for params in params_to_mse:
        # get means and standard deviations
        mean_mse = mean(params_to_mse[params])
        std_mse = stdev(params_to_mse[params])
        # reassign
        params_to_mse[params] = (mean_mse, std_mse)
        # sort means
        all_mse.append(mean_mse)
        mse_to_params[mean_mse] = params
                 
    all_mse.sort()

    # return top 5 mses
    results = []    
    for i in range(5):
        params = mse_to_params[all_mse[i]]
        (mean_mse, std_mse) = params_to_mse[params]
        print("Result " + str(i) + ": " +  str(params) + " with mean mse " + str(mean_mse) + ", std mse " + str(std_mse))
        results.append(params)
            
    return results, params_to_mse, mse_to_params, all_mse

In [28]:
all_results, all_params_to_mse, all_mse_to_params, all_mse = aggregateParams("../experiments/")

Result 0: maxtime20_pop512_mr0.001_tour8_maxHeight2_cr1_bpeverygen10 with mean mse 4921.5548, std mse 920.4438200685581
Result 1: maxtime20_pop512_mr0.001_tour8_maxHeight2_cr1 with mean mse 4988.2659, std mse 1133.1624242463556
Result 2: maxtime20_pop512_mr0.001_tour8_maxHeight2_cr1_bpeverygen20 with mean mse 5192.3958, std mse 1370.9129199891906
Result 3: maxtime20_pop512_mr0.001_tour8_maxHeight2_cr1_bpeverygen50 with mean mse 5466.2346, std mse 872.6040260407286
Result 4: maxtime20_pop512_mr0.001_tour8_maxHeight2_cr1_bpeverygen100 with mean mse 5520.9209, std mse 1135.7488312561256


### Analyse Simple GP Behaviour

Holding all other variables constant, lets see what affect the params have

In [None]:
def calc_means_and_stds(template_str, options):
    values = []
    for o in options:
        key = template_str.format(o)
        values.append((o ,all_params_to_mse[key]))
    return values

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def errorbar_plot(values):
    x = [] 
    y =[] 
    e = []
    
    for v in values:
        x.append(v[0])
        y.append(v[1][0])
        e.append(v[1][1])
    
    x = np.array(x)
    y = np.array(y)
    e = np.array(e)
    plt.errorbar(x, y, e, linestyle='-', marker='^')
    plt.show()

In [None]:
#### Variation with population size

template_pop_f = "maxtime20_pop{}_mr0.01_tour8_maxHeight2_cr1"
populations = [64, 128, 256, 512, 1024, 2048, 4096, 8192]

errorbar_plot(calc_means_and_stds(template_pop_f, populations))

In [None]:
#### Variation with mutation rate

template_mr_f = "maxtime20_pop512_mr{}_tour8_maxHeight2_cr1"
mutation_rates = [0, 0.001, 0.01, 0.1]

errorbar_plot(calc_means_and_stds(template_mr_f, mutation_rates))

In [None]:
#### Variation with tour size

template_tour_f = "maxtime20_pop512_mr0.01_tour{}_maxHeight2_cr1"
tour_size = [2, 4, 8]

errorbar_plot(calc_means_and_stds(template_tour_f, tour_size))

In [None]:
#### Variation with max height

template_max_f = "maxtime20_pop512_mr0.01_tour8_maxHeight{}_cr1"
maxHeights = [2, 4, 8]

errorbar_plot(calc_means_and_stds(template_max_f, maxHeights))

In [None]:
#### Variation with crossover rate

template_cr_f = "maxtime20_pop512_mr0.01_tour8_maxHeight2_cr{}"
crossoverRates = [0.25, 0.5, 0.75, 1] 

errorbar_plot(calc_means_and_stds(template_cr_f, crossoverRates))

In [None]:
#### Variation with max time

template_mt_f = "maxtime{}_pop512_mr0.01_tour8_maxHeight2_cr1"
maxTime = [5, 10, 15, 20]

errorbar_plot(calc_means_and_stds(template_mt_f, maxTime))