In [None]:
import os

import mlrose_hiive as mlrose
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler

#### Queens Tuning

In [None]:
problem = mlrose.QueensGenerator().generate(seed=420, size=24)

In [None]:
# RANDOM HILL CLIMBING TUNING (max attempts, restarts)
max_iter = 1000
restarts = 100
hc_tuning_results = pd.DataFrame(columns=['iter', 'max_attempts', 'restart', 'fitness', 'time'])
for i in range(3):
    for max_attempts in [1, 5, 10, 25, 50, 75]:
        problem.reset()
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=hc_tuning_results.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restart'] = end_stats['current_restart'].values
        results['fitness'] = end_stats['Fitness'].cummin().values
        results['time'] = end_stats['Time'].values
        hc_tuning_results = pd.concat([hc_tuning_results, results], axis=0)
hc_tuning_results.reset_index(drop=True, inplace=True)

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.to_csv('./results/hc_tuning_queens.csv')

In [None]:
# SIMULATED ANNEALING TUNING (max attempts)
max_iter = 10000
sa_tuning_results_1 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for max_attempts in [10, 25, 50, 100, 200, 500]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[1],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_1 = pd.concat([sa_tuning_results_1, results], axis=0)
sa_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
sa_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
sa_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
sa_tuning_results_1.to_csv('./results/sa_tuning_queens_1.csv')

In [None]:
# SIMULATED ANNEALING TUNING (init temp)
max_iter = 10000
max_attempts = 200
sa_tuning_results_2 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for temp in [.1, .5, 1, 2, 5, 10, 50, 100, 1000]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[temp],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_2 = pd.concat([sa_tuning_results_2, results], axis=0)
sa_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
sa_tuning_results_2.groupby('init_temp')['time'].mean().plot();
sa_tuning_results_2.groupby('init_temp')['fitness'].mean().plot();

In [None]:
sa_tuning_results_2.to_csv('./results/sa_tuning_queens_2.csv')

In [None]:
# SIMULATED ANNEALING TUNING (decay)
max_iter = 10000
max_attempts = 200
temp = 5
sa_tuning_results_3 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time'])
for i in range(5):
    for decay in [.95, .99, .995, .999, .9995, .9999]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[mlrose.GeomDecay(temp, decay=decay)],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['decay'] = decay
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_3 = pd.concat([sa_tuning_results_3, results], axis=0)
sa_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
sa_tuning_results_3.groupby('decay')['time'].mean().plot();
sa_tuning_results_3.groupby('decay')['fitness'].mean().plot();

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean()

In [None]:
sa_tuning_results_3.groupby('decay')['fitness'].mean()

In [None]:
sa_tuning_results_3.to_csv('./results/sa_tuning_queens_3.csv')

In [None]:
np.random.seed(420)
problem = mlrose.QueensOpt(length=24, crossover=mlrose.OnePointCrossOver(mlrose.QueensOpt(length=24)))

In [None]:
# GENETIC ALGORITHM TUNING (mutation rates)
max_iter = 1000
max_attempts = 200
mutation_rates = [.1]
population_sizes = [200]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(3):
    for mutation_rate in [.01, .1, .2, .5, 1]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=[mutation_rate],
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_3 = pd.concat([ga_tuning_results_3, results], axis=0)
ga_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
ga_tuning_results_3.groupby('mutation_rate')['time'].mean().plot();
ga_tuning_results_3.groupby('mutation_rate')['fitness'].mean().plot();

In [None]:
ga_tuning_results_3.to_csv('./results/ga_tuning_queens_3.csv')

In [None]:
# GENETIC ALGORITHM TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
mutation_rates = [.5]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(3):
    for max_attempts in [10, 25, 50, 100, 200, 500]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_1 = pd.concat([ga_tuning_results_1, results], axis=0)
ga_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
ga_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
ga_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
ga_tuning_results_1.to_csv('./results/ga_tuning_queens_1.csv')

In [None]:
# GENETIC ALGORITHM TUNING (pop size)
max_iter = 1000
max_attempts = 200
mutation_rates = [.5]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(3):
    for population_size in [50, 100, 200, 500]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=[population_size],
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_2 = pd.concat([ga_tuning_results_2, results], axis=0)
ga_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
ga_tuning_results_2.groupby('pop_size')['time'].mean().plot();
ga_tuning_results_2.groupby('pop_size')['fitness'].mean().plot();

In [None]:
ga_tuning_results_2.to_csv('./results/ga_tuning_queens_2.csv')

In [None]:
# GENETIC ALGORITHM TUNING (crossover)
max_iter = 1000
max_attempts = 200
mutation_rates = [.5]
population_sizes = [200]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'crossover', 'fitness', 'time']
ga_tuning_results_4 = pd.DataFrame(columns=cols)
crossovers = {
    'one_point': mlrose.OnePointCrossOver(mlrose.QueensOpt(length=24)),
    'uniform': mlrose.UniformCrossOver(mlrose.QueensOpt(length=24))
}
for i in range(3):
    for c_name, crossover in crossovers.items():
        np.random.seed(420)
        problem = mlrose.QueensOpt(length=24, crossover=crossover)
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=[mutation_rate],
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_4.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['crossover'] = c_name
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_4 = pd.concat([ga_tuning_results_4, results], axis=0)
ga_tuning_results_4.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
ga_tuning_results_4.groupby('crossover')['time'].mean().plot();
ga_tuning_results_4.groupby('crossover')['fitness'].mean().plot();

In [None]:
ga_tuning_results_4.to_csv('./results/ga_tuning_queens_4.csv')

In [None]:
problem = mlrose.QueensGenerator().generate(seed=420, size=24)

In [None]:
# MIMIC TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
keep_pcts = [.2]
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(3):
    for max_attempts in [1, 2, 5, 10, 25]:
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_1 = pd.concat([mimic_tuning_results_1, results], axis=0)
mimic_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
mimic_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_1.to_csv('./results/mimic_tuning_queens_1.csv')

In [None]:
# MIMIC TUNING (pop size)
max_iter = 1000
keep_pcts = [.2]
max_attempts = 2
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(3):
    for pop_size in [50, 100, 200, 500, 1000, 2000]:
        population_sizes = [pop_size]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_2 = pd.concat([mimic_tuning_results_2, results], axis=0)
mimic_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
# try to do with matplotlib to get time on right and performance on left
mimic_tuning_results_2.groupby('pop_size')['time'].mean().plot();
mimic_tuning_results_2.groupby('pop_size')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_2.to_csv('./results/mimic_tuning_queens_2.csv')

In [None]:
# MIMIC TUNING (keep pct)
max_iter = 1000
population_sizes = [1000]
max_attempts = 2
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(3):
    for keep_pct in [.05, .1, .2, .3, .5]:
        keep_pcts = [keep_pct]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_3 = pd.concat([mimic_tuning_results_3, results], axis=0)
mimic_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean().plot();
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_3.to_csv('./results/mimic_tuning_queens_3.csv')

#### Queens Final Performance

In [None]:
problem_sizes = [16, 24, 32]
prob_seed = 4200

In [None]:
# RANDOM HILL CLIMBING
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 75
restarts = 10
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'restarts', 'fitness', 'time', 'f_evals']
hc_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.QueensGenerator().generate(seed=prob_seed, size=prob_size)
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        n_rows = stats.drop_duplicates('Iteration').shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=hc_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restarts'] = restarts
        results['iteration'] = stats.drop_duplicates('Iteration')['Iteration'].values
        results['fitness'] = stats.groupby('Iteration')['Fitness'].min().values
        stats['time_per_iter'] = stats['Time'].diff().fillna(stats['Time'].min())
        results['time'] = stats.groupby('Iteration')['time_per_iter'].mean().cumsum().values
        stats['evals_per_iter'] = stats['FEvals'].diff().fillna(stats['FEvals'].min())
        results['f_evals'] = stats.groupby('Iteration')['evals_per_iter'].mean().cumsum().values
        hc_final_results = pd.concat([hc_final_results, results], axis=0)
hc_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    hc_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
hc_final_results.to_csv('./results/hc_final_queens.csv')

In [None]:
# SIMULATED ANNEALING
max_iter = 10000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 200
decay = .995
init_temp = 5
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time', 'f_evals']
sa_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.QueensGenerator().generate(seed=prob_seed, size=prob_size)
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            temperature_list=[mlrose.GeomDecay(init_temp, decay=decay)],
        )
        stats, _ = sa_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=sa_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = init_temp
        results['decay'] = decay
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        sa_final_results = pd.concat([sa_final_results, results], axis=0)
sa_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    sa_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
sa_final_results.to_csv('./results/sa_final_queens.csv')

In [None]:
# GENETIC ALGORITHM
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 200
pop_size = 200
mutation_rate = 1
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time', 'f_evals']
ga_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.QueensGenerator().generate(seed=prob_seed, size=prob_size)
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            population_sizes=[pop_size],
            mutation_rates=[mutation_rate]
        )
        stats, _ = ga_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=ga_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = pop_size
        results['mutation_rate'] = mutation_rate
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        ga_final_results = pd.concat([ga_final_results, results], axis=0)
ga_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    ga_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
ga_final_results.to_csv('./results/ga_final_queens.csv')

In [None]:
# MIMIC
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 2
pop_size = 1000
keep_pct = .1
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time', 'f_evals']
mimic_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.QueensGenerator().generate(seed=prob_seed, size=prob_size)
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            population_sizes=[pop_size],
            keep_percent_list=[keep_pct],
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=mimic_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = pop_size
        results['keep_pct'] = keep_pct
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        mimic_final_results = pd.concat([mimic_final_results, results], axis=0)
mimic_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    mimic_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
mimic_final_results.to_csv('./results/mimic_final_queens.csv')

#### Knapsack Tuning

In [None]:
problem = mlrose.KnapsackGenerator().generate(
    seed=420, max_item_count=2, max_weight_pct=.35, number_of_items_types=110
)

In [None]:
# HILL CLIMBING TUNING (restarts and max attempts)
max_iter = 1000
restarts = 100
hc_tuning_results = pd.DataFrame(columns=['iter', 'max_attempts', 'restart', 'fitness', 'time'])
for max_attempts in [1, 5, 10, 25, 50]:
    for i in range(5):
        problem.reset()
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=hc_tuning_results.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restart'] = end_stats['current_restart'].values
        results['fitness'] = end_stats['Fitness'].cummax().values
        results['time'] = end_stats['Time'].values
        hc_tuning_results = pd.concat([hc_tuning_results, results], axis=0)

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.to_csv('./results/hc_tuning_ks.csv')

In [None]:
# SIMULATED ANNEALING TUNING (max attempts)
max_iter = 100000
sa_tuning_results_1 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for max_attempts in [10, 25, 50, 100, 200, 500]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[1],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_1 = pd.concat([sa_tuning_results_1, results], axis=0)
sa_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
sa_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
sa_tuning_results_1.to_csv('./results/sa_tuning_ks_1.csv')

In [None]:
# SIMULATED ANNEALING TUNING (init temp)
max_iter = 100000
max_attempts = 500
sa_tuning_results_2 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for temp in [.1, .5, 1, 2, 5, 10, 50, 100, 1000]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[temp],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_2 = pd.concat([sa_tuning_results_2, results], axis=0)
sa_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_2.groupby('init_temp')['time'].mean().plot();
sa_tuning_results_2.groupby('init_temp')['fitness'].mean().plot();

In [None]:
sa_tuning_results_2.to_csv('./results/sa_tuning_ks_2.csv')

In [None]:
# SIMULATED ANNEALING TUNING (decay)
max_iter = 100000
max_attempts = 500
temp = 50
sa_tuning_results_3 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time'])
for i in range(5):
    for decay in [.95, .99, .995, .999, .9995, .9999]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[mlrose.GeomDecay(temp, decay=decay)],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['decay'] = decay
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_3 = pd.concat([sa_tuning_results_3, results], axis=0)
sa_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean().plot();
sa_tuning_results_3.groupby('decay')['fitness'].mean().plot();

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean()

In [None]:
sa_tuning_results_3.groupby('decay')['fitness'].mean()

In [None]:
sa_tuning_results_3.to_csv('./results/sa_tuning_ks_3.csv')

In [None]:
# GENETIC ALGORITHM TUNING (mutation rates)
max_iter = 1000
max_attempts = 200
population_sizes = [200]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(3):
    for mutation_rate in [.01, .1, .2, .5, 1]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=[mutation_rate],
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_1 = pd.concat([ga_tuning_results_1, results], axis=0)
ga_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['time'].mean().plot();
ga_tuning_results_1.groupby('mutation_rate')['fitness'].mean().plot();

In [None]:
ga_tuning_results_1.to_csv('./results/ga_tuning_ks_1.csv')

In [None]:
# GENETIC ALGORITHM TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
mutation_rates = [.1]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(3):
    for max_attempts in [10, 25, 50, 100, 200, 500]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_2 = pd.concat([ga_tuning_results_2, results], axis=0)
ga_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_2.groupby('max_attempts')['time'].mean().plot();
ga_tuning_results_2.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
ga_tuning_results_2.to_csv('./results/ga_tuning_ks_2.csv')

In [None]:
# GENETIC ALGORITHM TUNING (pop size)
max_iter = 1000
max_attempts = 25
mutation_rates = [.1]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(3):
    for population_size in [50, 100, 200, 500]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=[population_size],
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_3 = pd.concat([ga_tuning_results_3, results], axis=0)
ga_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_3.groupby('pop_size')['time'].mean().plot();
ga_tuning_results_3.groupby('pop_size')['fitness'].mean().plot();

In [None]:
ga_tuning_results_3.groupby('pop_size')['fitness'].mean()

In [None]:
ga_tuning_results_3.groupby('pop_size')['time'].mean()

In [None]:
ga_tuning_results_3.to_csv('./results/ga_tuning_ks_3.csv')

In [None]:
# MIMIC TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
keep_pcts = [.2]
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(3):
    for max_attempts in [1, 2, 5, 10, 25]:
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_1 = pd.concat([mimic_tuning_results_1, results], axis=0)
mimic_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean()

In [None]:
mimic_tuning_results_1.to_csv('./results/mimic_tuning_ks_1.csv')

In [None]:
# MIMIC TUNING (pop size)
max_iter = 1000
keep_pcts = [.2]
max_attempts = 5
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(3):
    for pop_size in [50, 100, 200, 500, 1000, 2000]:
        population_sizes = [pop_size]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_2 = pd.concat([mimic_tuning_results_2, results], axis=0)
mimic_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_2.groupby('pop_size')['time'].mean().plot();
mimic_tuning_results_2.groupby('pop_size')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_2.groupby('pop_size')['time'].mean()

In [None]:
mimic_tuning_results_2.to_csv('./results/mimic_tuning_ks_2.csv')

In [None]:
# MIMIC TUNING (keep pct)
max_iter = 1000
population_sizes = [1000]
max_attempts = 5
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(3):
    for keep_pct in [.05, .1, .2, .3, .5]:
        keep_pcts = [keep_pct]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_3 = pd.concat([mimic_tuning_results_3, results], axis=0)
mimic_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean().plot();
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean()

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean()

In [None]:
mimic_tuning_results_3.to_csv('./results/mimic_tuning_ks_3.csv')

#### Knapsack Final Performance

In [None]:
# SKIP

#### 4 Peaks Tuning

In [None]:
prob_size = 110
fitness_fn = mlrose.FourPeaks(t_pct=.1)
basic_opt = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn)
crossover = mlrose.OnePointCrossOver(basic_opt)
mutator = mlrose.ChangeOneMutator(basic_opt)
np.random.seed(420)
problem = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn, crossover=crossover, mutator=mutator)

In [None]:
# HILL CLIMBING TUNING (restarts and max attempts)
max_iter = 1000
restarts = 100
hc_tuning_results = pd.DataFrame(columns=['iter', 'max_attempts', 'restart', 'fitness', 'time'])
for max_attempts in [1, 5, 10, 25, 50, 100]:
    for i in range(5):
        problem.reset()
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=hc_tuning_results.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restart'] = end_stats['current_restart'].values
        results['fitness'] = end_stats['Fitness'].cummax().values
        results['time'] = end_stats['Time'].values
        hc_tuning_results = pd.concat([hc_tuning_results, results], axis=0)

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.to_csv('./results/hc_tuning_4pks.csv')

In [None]:
# SIMULATED ANNEALING TUNING (max attempts)
max_iter = 10000
sa_tuning_results_1 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for max_attempts in [10, 25, 50, 100, 200, 500]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[1],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_1 = pd.concat([sa_tuning_results_1, results], axis=0)
sa_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
sa_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
sa_tuning_results_1.to_csv('./results/sa_tuning_4pks_1.csv')

In [None]:
# SIMULATED ANNEALING TUNING (init temp)
max_iter = 10000
max_attempts = 100
sa_tuning_results_2 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for temp in [.1, .5, 1, 2, 5, 10, 50, 100, 1000]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[temp],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_2 = pd.concat([sa_tuning_results_2, results], axis=0)
sa_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_2.groupby('init_temp')['time'].mean().plot();
sa_tuning_results_2.groupby('init_temp')['fitness'].mean().plot();

In [None]:
sa_tuning_results_2.groupby('init_temp')['fitness'].mean()

In [None]:
sa_tuning_results_2.groupby('init_temp')['time'].mean()

In [None]:
sa_tuning_results_2.to_csv('./results/sa_tuning_4pks_2.csv')

In [None]:
# SIMULATED ANNEALING TUNING (decay)
max_iter = 10000
max_attempts = 100
temp = .5
sa_tuning_results_3 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time'])
for i in range(5):
    for decay in [.95, .99, .995, .999, .9995, .9999]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[mlrose.GeomDecay(temp, decay=decay)],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['decay'] = decay
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_3 = pd.concat([sa_tuning_results_3, results], axis=0)
sa_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean().plot();
sa_tuning_results_3.groupby('decay')['fitness'].mean().plot();

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean()

In [None]:
sa_tuning_results_3.groupby('decay')['fitness'].mean()

In [None]:
sa_tuning_results_3.to_csv('./results/sa_tuning_4pks_3.csv')

In [None]:
# GENETIC ALGORITHM TUNING (mutation rates)
max_iter = 1000
max_attempts = 200
population_sizes = [200]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(5):
    for mutation_rate in [.01, .1, .2, .5, 1]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=[mutation_rate],
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_1 = pd.concat([ga_tuning_results_1, results], axis=0)
ga_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['time'].mean().plot();
ga_tuning_results_1.groupby('mutation_rate')['fitness'].mean().plot();

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['fitness'].mean()

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['time'].mean()

In [None]:
ga_tuning_results_1.to_csv('./results/ga_tuning_4pks_1.csv')

In [None]:
# GENETIC ALGORITHM TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
mutation_rates = [.5]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(3):
    for max_attempts in [1, 5, 10, 25, 50, 100]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_2 = pd.concat([ga_tuning_results_2, results], axis=0)
ga_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_2.groupby('max_attempts')['time'].mean().plot();
ga_tuning_results_2.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
ga_tuning_results_2.to_csv('./results/ga_tuning_4pks_2.csv')

In [None]:
ga_tuning_results_2.groupby('max_attempts')['fitness'].mean()

In [None]:
# GENETIC ALGORITHM TUNING (pop size)
max_iter = 1000
max_attempts = 25
mutation_rates = [.5]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(3):
    for population_size in [25, 50, 100, 200, 500, 1000]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=[population_size],
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_3 = pd.concat([ga_tuning_results_3, results], axis=0)
ga_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_3.groupby('pop_size')['time'].mean().plot();
ga_tuning_results_3.groupby('pop_size')['fitness'].mean().plot();

In [None]:
ga_tuning_results_3.groupby('pop_size')['fitness'].mean()

In [None]:
ga_tuning_results_3.groupby('pop_size')['time'].mean()

In [None]:
ga_tuning_results_3.to_csv('./results/ga_tuning_4pks_3.csv')

In [None]:
# MIMIC TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
keep_pcts = [.2]
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(3):
    for max_attempts in [1, 2, 5, 10, 25, 50, 100]:
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_1 = pd.concat([mimic_tuning_results_1, results], axis=0)
mimic_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean()

In [None]:
mimic_tuning_results_1.to_csv('./results/mimic_tuning_4pks_1.csv')

In [None]:
# MIMIC TUNING (pop size)
max_iter = 1000
keep_pcts = [.2]
max_attempts = 25
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(3):
    for pop_size in [50, 100, 200, 500, 1000, 2000]:
        population_sizes = [pop_size]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_2 = pd.concat([mimic_tuning_results_2, results], axis=0)
mimic_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_2.groupby('pop_size')['time'].mean().plot();
mimic_tuning_results_2.groupby('pop_size')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_2.groupby('pop_size')['time'].mean()

In [None]:
mimic_tuning_results_2.groupby('pop_size')['fitness'].mean()

In [None]:
mimic_tuning_results_2.to_csv('./results/mimic_tuning_4pks_2.csv')

In [None]:
# MIMIC TUNING (keep pct)
max_iter = 1000
population_sizes = [1000]
max_attempts = 25
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(3):
    for keep_pct in [.05, .1, .2, .3, .5]:
        keep_pcts = [keep_pct]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_3 = pd.concat([mimic_tuning_results_3, results], axis=0)
mimic_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean().plot();
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean()

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean()

In [None]:
mimic_tuning_results_3.to_csv('./results/mimic_tuning_4pks_3.csv')

#### 4 Peaks Final Performance

In [None]:
problem_sizes = [64, 110, 160]
prob_seed = 4200

In [None]:
# RANDOM HILL CLIMBING
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 100
restarts = 75
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'restarts', 'fitness', 'time', 'f_evals']
hc_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        fitness_fn = mlrose.FourPeaks(t_pct=.1)
        basic_opt = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn)
        crossover = mlrose.OnePointCrossOver(basic_opt)
        mutator = mlrose.ChangeOneMutator(basic_opt)
        np.random.seed(prob_seed)
        problem = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn, crossover=crossover, mutator=mutator)
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        n_rows = stats.drop_duplicates('Iteration').shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=hc_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restarts'] = restarts
        results['iteration'] = stats.drop_duplicates('Iteration')['Iteration'].values
        results['fitness'] = stats.groupby('Iteration')['Fitness'].max().values
        stats['time_per_iter'] = stats['Time'].diff().fillna(stats['Time'].min())
        results['time'] = stats.groupby('Iteration')['time_per_iter'].mean().cumsum().values
        stats['evals_per_iter'] = stats['FEvals'].diff().fillna(stats['FEvals'].min())
        results['f_evals'] = stats.groupby('Iteration')['evals_per_iter'].mean().cumsum().values
        hc_final_results = pd.concat([hc_final_results, results], axis=0)
hc_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    hc_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
hc_final_results.to_csv('./results/hc_final_4pks.csv')

In [None]:
# SIMULATED ANNEALING
max_iter = 40000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 100
decay = .995
init_temp = .5
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time', 'f_evals']
sa_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        fitness_fn = mlrose.FourPeaks(t_pct=.1)
        basic_opt = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn)
        crossover = mlrose.OnePointCrossOver(basic_opt)
        mutator = mlrose.ChangeOneMutator(basic_opt)
        np.random.seed(prob_seed)
        problem = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn, crossover=crossover, mutator=mutator)
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            temperature_list=[mlrose.GeomDecay(init_temp, decay=decay)],
        )
        stats, _ = sa_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=sa_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = init_temp
        results['decay'] = decay
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        sa_final_results = pd.concat([sa_final_results, results], axis=0)
sa_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    sa_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
sa_final_results.to_csv('./results/sa_final_4pks.csv')

In [None]:
# GENETIC ALGORITHM
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 25
pop_size = 500
mutation_rate = .5
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time', 'f_evals']
ga_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        fitness_fn = mlrose.FourPeaks(t_pct=.1)
        basic_opt = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn)
        crossover = mlrose.OnePointCrossOver(basic_opt)
        mutator = mlrose.ChangeOneMutator(basic_opt)
        np.random.seed(prob_seed)
        problem = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn, crossover=crossover, mutator=mutator)
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            population_sizes=[pop_size],
            mutation_rates=[mutation_rate]
        )
        stats, _ = ga_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=ga_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = pop_size
        results['mutation_rate'] = mutation_rate
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        ga_final_results = pd.concat([ga_final_results, results], axis=0)
ga_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    ga_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
ga_final_results.to_csv('./results/ga_final_4pks.csv')

In [None]:
# MIMIC
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 25
pop_size = 1000
keep_pct = .2
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time', 'f_evals']
mimic_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        fitness_fn = mlrose.FourPeaks(t_pct=.1)
        basic_opt = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn)
        crossover = mlrose.OnePointCrossOver(basic_opt)
        mutator = mlrose.ChangeOneMutator(basic_opt)
        np.random.seed(prob_seed)
        problem = mlrose.DiscreteOpt(prob_size, fitness_fn=fitness_fn, crossover=crossover, mutator=mutator)
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            population_sizes=[pop_size],
            keep_percent_list=[keep_pct],
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=mimic_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = pop_size
        results['keep_pct'] = keep_pct
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        mimic_final_results = pd.concat([mimic_final_results, results], axis=0)
mimic_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    mimic_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
mimic_final_results.to_csv('./results/mimic_final_4pks.csv')

#### Flip Flop Tuning

In [None]:
problem = mlrose.FlipFlopGenerator().generate(420, 200)

In [None]:
# HILL CLIMBING TUNING (restarts and max attempts)
max_iter = 1000
restarts = 100
hc_tuning_results = pd.DataFrame(columns=['iter', 'max_attempts', 'restart', 'fitness', 'time'])
for max_attempts in [1, 5, 10, 25, 50, 100]:
    for i in range(5):
        problem.reset()
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=hc_tuning_results.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restart'] = end_stats['current_restart'].values
        results['fitness'] = end_stats['Fitness'].cummax().values
        results['time'] = end_stats['Time'].values
        hc_tuning_results = pd.concat([hc_tuning_results, results], axis=0)

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['time'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.groupby(['max_attempts', 'restart'])['fitness'].mean().unstack().T.plot();

In [None]:
hc_tuning_results.to_csv('./results/hc_tuning_ff.csv')

In [None]:
# SIMULATED ANNEALING TUNING (max attempts)
max_iter = 10000
sa_tuning_results_1 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for max_attempts in [10, 25, 50, 100, 200, 500]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[1],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_1 = pd.concat([sa_tuning_results_1, results], axis=0)
sa_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
sa_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
sa_tuning_results_1.to_csv('./results/sa_tuning_ff_1.csv')

In [None]:
sa_tuning_results_1.groupby('max_attempts')['fitness'].mean()

In [None]:
sa_tuning_results_1.groupby('max_attempts')['time'].mean()

In [None]:
# SIMULATED ANNEALING TUNING (init temp)
max_iter = 10000
max_attempts = 100
sa_tuning_results_2 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'fitness', 'time'])
for i in range(5):
    for temp in [.1, .5, 1, 2, 5, 10, 50, 100]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[temp],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_2 = pd.concat([sa_tuning_results_2, results], axis=0)
sa_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_2.groupby('init_temp')['time'].mean().plot();
sa_tuning_results_2.groupby('init_temp')['fitness'].mean().plot();

In [None]:
sa_tuning_results_2.groupby('init_temp')['fitness'].mean()

In [None]:
sa_tuning_results_2.groupby('init_temp')['time'].mean()

In [None]:
sa_tuning_results_2.to_csv('./results/sa_tuning_ff_2.csv')

In [None]:
# SIMULATED ANNEALING TUNING (decay)
max_iter = 10000
max_attempts = 100
temp = 10
sa_tuning_results_3 = pd.DataFrame(columns=['iter', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time'])
for i in range(5):
    for decay in [.95, .99, .995, .999, .9995, .9999]:
        problem.reset()
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            temperature_list=[mlrose.GeomDecay(temp, decay=decay)],
            max_attempts=max_attempts,
        )
        stats, _ = sa_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=sa_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = end_stats['schedule_init_temp'].values
        results['decay'] = decay
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        sa_tuning_results_3 = pd.concat([sa_tuning_results_3, results], axis=0)
sa_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean().plot();
sa_tuning_results_3.groupby('decay')['fitness'].mean().plot();

In [None]:
sa_tuning_results_3.groupby('decay')['time'].mean()

In [None]:
sa_tuning_results_3.groupby('decay')['fitness'].mean()

In [None]:
sa_tuning_results_3.to_csv('./results/sa_tuning_ff_3.csv')

In [None]:
# GENETIC ALGORITHM TUNING (mutation rates)
max_iter = 1000
max_attempts = 200
population_sizes = [200]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(1):
    for mutation_rate in [.01, .1, .2, .5, 1]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=[mutation_rate],
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_1 = pd.concat([ga_tuning_results_1, results], axis=0)
ga_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['time'].mean().plot();
ga_tuning_results_1.groupby('mutation_rate')['fitness'].mean().plot();

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['fitness'].mean()

In [None]:
ga_tuning_results_1.groupby('mutation_rate')['time'].mean()

In [None]:
ga_tuning_results_1.to_csv('./results/ga_tuning_ff_1.csv')

In [None]:
# GENETIC ALGORITHM TUNING (max attempts)
max_iter = 1000
population_sizes = [200]
mutation_rates = [1]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(1):
    for max_attempts in [1, 5, 10, 25, 50, 100, 200, 500]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_2 = pd.concat([ga_tuning_results_2, results], axis=0)
ga_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_2.groupby('max_attempts')['time'].mean().plot();
ga_tuning_results_2.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
ga_tuning_results_2.groupby('max_attempts')['fitness'].mean()

In [None]:
ga_tuning_results_2.groupby('max_attempts')['time'].mean()

In [None]:
ga_tuning_results_2.to_csv('./results/ga_tuning_ff_2.csv')

In [None]:
# GENETIC ALGORITHM TUNING (pop size)
max_iter = 1000
max_attempts = 500
mutation_rates = [1]
cols = ['iter', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time']
ga_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(1):
    for population_size in [25, 50, 100, 200, 500, 1000]:
        problem.reset()
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=[population_size],
            mutation_rates=mutation_rates,
            max_attempts=max_attempts
        )
        stats, _ = ga_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=ga_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['mutation_rate'] = end_stats['Mutation Rate'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        ga_tuning_results_3 = pd.concat([ga_tuning_results_3, results], axis=0)
ga_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
ga_tuning_results_3.groupby('pop_size')['time'].mean().plot();
ga_tuning_results_3.groupby('pop_size')['fitness'].mean().plot();

In [None]:
ga_tuning_results_3.groupby('pop_size')['fitness'].mean()

In [None]:
ga_tuning_results_3.groupby('pop_size')['time'].mean()

In [None]:
ga_tuning_results_3.to_csv('./results/ga_tuning_ff_3.csv')

In [None]:
# MIMIC TUNING (max attempts)
max_iter = 1000
population_sizes = [500]
keep_pcts = [.2]
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_1 = pd.DataFrame(columns=cols)
for i in range(1):
    for max_attempts in [1, 2, 5, 10, 25, 50]:
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_1.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_1 = pd.concat([mimic_tuning_results_1, results], axis=0)
mimic_tuning_results_1.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['time'].mean().plot();
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['fitness'].mean()

In [None]:
mimic_tuning_results_1.groupby('max_attempts')['time'].mean()

In [None]:
mimic_tuning_results_1.to_csv('./results/mimic_tuning_ff_1.csv')

In [None]:
# MIMIC TUNING (pop size)
max_iter = 1000
keep_pcts = [.2]
max_attempts = 5
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_2 = pd.DataFrame(columns=cols)
for i in range(1):
    for pop_size in [50, 100, 200, 500, 1000, 2000, 5000]:
        population_sizes = [pop_size]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_2.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_2 = pd.concat([mimic_tuning_results_2, results], axis=0)
mimic_tuning_results_2.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_2.groupby('pop_size')['time'].mean().plot();
mimic_tuning_results_2.groupby('pop_size')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_2.groupby('pop_size')['time'].mean()

In [None]:
mimic_tuning_results_2.groupby('pop_size')['fitness'].mean()

In [None]:
mimic_tuning_results_2.to_csv('./results/mimic_tuning_ff_2.csv')

In [None]:
# MIMIC TUNING (keep pct)
max_iter = 1000
population_sizes = [5000]
max_attempts = 5
cols = ['iter', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time']
mimic_tuning_results_3 = pd.DataFrame(columns=cols)
for i in range(1):
    for keep_pct in [.05, .1, .2, .3, .5]:
        keep_pcts = [keep_pct]
        problem.reset()
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=42*(i+1),
            iteration_list=[max_iter],
            population_sizes=population_sizes,
            keep_percent_list=keep_pcts,
            max_attempts=max_attempts,
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        end_stats = stats.query('Iteration==@max_iter').reset_index(drop=True)
        results = pd.DataFrame(index=range(end_stats.shape[0]), columns=mimic_tuning_results_3.columns)
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = end_stats['Population Size'].values
        results['keep_pct'] = end_stats['Keep Percent'].values
        results['fitness'] = end_stats['Fitness'].values
        results['time'] = end_stats['Time'].values
        mimic_tuning_results_3 = pd.concat([mimic_tuning_results_3, results], axis=0)
mimic_tuning_results_3.reset_index(drop=True, inplace=True)

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean().plot();
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean().plot();

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['time'].mean()

In [None]:
mimic_tuning_results_3.groupby('keep_pct')['fitness'].mean()

In [None]:
mimic_tuning_results_3.to_csv('./results/mimic_tuning_ff_3.csv')

#### Flip Flop Final Performance

In [None]:
problem_sizes = [100, 200, 300]
prob_seed = 4200

In [None]:
# RANDOM HILL CLIMBING
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 100
restarts = 100
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'restarts', 'fitness', 'time', 'f_evals']
hc_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.FlipFlopGenerator().generate(prob_seed, size=prob_size)
        hc_runner = mlrose.RHCRunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            restart_list=[restarts],
            max_attempts=max_attempts,
            generate_curves=True
        )
        stats, _ = hc_runner.run()
        n_rows = stats.drop_duplicates('Iteration').shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=hc_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['restarts'] = restarts
        results['iteration'] = stats.drop_duplicates('Iteration')['Iteration'].values
        results['fitness'] = stats.groupby('Iteration')['Fitness'].max().values
        stats['time_per_iter'] = stats['Time'].diff().fillna(stats['Time'].min())
        results['time'] = stats.groupby('Iteration')['time_per_iter'].mean().cumsum().values
        stats['evals_per_iter'] = stats['FEvals'].diff().fillna(stats['FEvals'].min())
        results['f_evals'] = stats.groupby('Iteration')['evals_per_iter'].mean().cumsum().values
        hc_final_results = pd.concat([hc_final_results, results], axis=0)
hc_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    hc_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
hc_final_results.to_csv('./results/hc_final_ff.csv')

In [None]:
# SIMULATED ANNEALING
max_iter = 10000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 100
decay = .95
init_temp = 10
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'init_temp', 'decay', 'fitness', 'time', 'f_evals']
sa_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.FlipFlopGenerator().generate(prob_seed, size=prob_size)
        sa_runner = mlrose.SARunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            temperature_list=[mlrose.GeomDecay(init_temp, decay=decay)],
        )
        stats, _ = sa_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=sa_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['init_temp'] = init_temp
        results['decay'] = decay
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        sa_final_results = pd.concat([sa_final_results, results], axis=0)
sa_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    sa_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
sa_final_results.to_csv('./results/sa_final_ff.csv')

In [None]:
# GENETIC ALGORITHM
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 500
pop_size = 1000
mutation_rate = 1
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'pop_size', 'mutation_rate', 'fitness', 'time', 'f_evals']
ga_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.FlipFlopGenerator().generate(prob_seed, size=prob_size)
        ga_runner = mlrose.GARunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            population_sizes=[pop_size],
            mutation_rates=[mutation_rate]
        )
        stats, _ = ga_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=ga_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = pop_size
        results['mutation_rate'] = mutation_rate
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        ga_final_results = pd.concat([ga_final_results, results], axis=0)
ga_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    ga_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
ga_final_results.to_csv('./results/ga_final_ff.csv')

In [None]:
# MIMIC
max_iter = 1000
step = 10
iteration_list = np.arange(step, max_iter+1, step)
max_attempts = 5
pop_size = 5000
keep_pct = .1
cols = ['prob_size', 'iter', 'iteration', 'max_attempts', 'pop_size', 'keep_pct', 'fitness', 'time', 'f_evals']
mimic_final_results = pd.DataFrame(columns=cols)
for prob_size in problem_sizes:
    for i in range(3):
        problem = mlrose.FlipFlopGenerator().generate(prob_seed, size=prob_size)
        mimic_runner = mlrose.MIMICRunner(
            problem,
            '',
            seed=420*(i+1),
            iteration_list=iteration_list,
            max_attempts=max_attempts,
            population_sizes=[pop_size],
            keep_percent_list=[keep_pct],
            use_fast_mimic=True
        )
        stats, _ = mimic_runner.run()
        n_rows = stats.shape[0]
        results = pd.DataFrame(index=range(n_rows), columns=mimic_final_results.columns)
        results['prob_size'] = prob_size
        results['iter'] = i + 1
        results['max_attempts'] = max_attempts
        results['pop_size'] = pop_size
        results['keep_pct'] = keep_pct
        results['iteration'] = stats['Iteration'].values
        results['fitness'] = stats['Fitness'].values
        results['time'] = stats['Time'].values
        results['f_evals'] = stats['FEvals'].values
        mimic_final_results = pd.concat([mimic_final_results, results], axis=0)
mimic_final_results.reset_index(drop=True, inplace=True)

In [None]:
for prob_size in problem_sizes:
    mimic_final_results.query(
        'prob_size==@prob_size'
    ).drop_duplicates(['time']).groupby('iteration')['fitness'].mean().plot(
        label=prob_size, legend=True
    );

In [None]:
mimic_final_results.to_csv('./results/mimic_final_ff.csv')