In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import sys
import mlrose_hiive as mlr
from mlrose_hiive import Queens, DiscreteOpt, random_hill_climb
from mlrose_hiive import SARunner, GARunner, RHCRunner
from array import array
import importlib
import numpy as np
import helpers #import the module here, so that it can be reloaded.
importlib.reload(helpers)
import time
import joblib

In [66]:
# Define alternative N-Queens fitness function for maximization problem
def queens_max(state):
   # Initialize counter
    fitness_cnt = 0
    for i in range(len(state) - 1):
        for j in range(i + 1, len(state)):
            if (state[j] != state[i]) \
                and (state[j] != state[i] + (j - i)) \
                and (state[j] != state[i] - (j - i)):

                   # If no attacks, then increment counter
                    fitness_cnt += 1

    return fitness_cnt

# Initialize custom fitness function object
fitness_cust = mlr.CustomFitness(queens_max)
eval_func = fitness_cust

In [69]:
N = 8
max_iters = 5000
numTrials = 5
restarts = [25, 75, 100]
max_attempts = [25, 50, 75, 100, 125, 150]
labels = ['max_attempts', 'restarts', 'run_time', 'best_fitness', 'stopped_at']
results_list=[]
for a in max_attempts:
    for r in restarts:
        best_fitnesses_run=[]
        run_times_run=[]
        iterations_run=[]
        for i in range(5):
            prob = mlr.DiscreteOpt(N, eval_func, max_val=N)
            start_time = time.perf_counter()
            best_state, best_fitness, curve = mlr.random_hill_climb(prob, max_attempts=a, max_iters=max_iters, restarts=r, curve=True)
            run_time = time.perf_counter() - start_time
            best_fitnesses_run.append(best_fitness)
            run_times_run.append(run_time)
            stopped_at = len(curve)
            iterations_run.append(stopped_at)
            joblib.dump(pd.DataFrame.from_records(curve), f"queens/rhc/max_attempts{a},restarts{r},run{i}_curve.pkl")
            
        results_list.append([a, r, np.mean(run_times_run), np.mean(best_fitnesses_run), np.mean(iterations_run)])

rhc_results = pd.DataFrame.from_records(results_list, columns=labels)
joblib.dump(df_results, "queens/rhc/queens_rhc_results.pkl")

['queens/rhc/queens_rhc_results.pkl']

In [70]:
rhc_results

Unnamed: 0,max_attempts,restarts,run_time,best_fitness,stopped_at
0,25,25,0.095117,27.8,61.2
1,25,75,0.216811,28.0,62.4
2,25,100,0.290001,28.0,55.4
3,50,25,0.131047,28.0,118.4
4,50,75,0.380547,28.0,122.0
5,50,100,0.519926,28.0,115.2
6,75,25,0.178626,28.0,148.4
7,75,75,0.551858,28.0,153.4
8,75,100,0.751907,28.0,163.8
9,100,25,0.236142,27.8,182.0


In [71]:
N = 8
max_iters = 5000
numTrials = 5
restarts = [25, 75, 100]
temps = [1, 5, 10, 50, 100]
max_attempts = [25, 50, 75, 100, 125, 150]
labels = ['max_attempts', 'restarts', 'run_time', 'best_fitness', 'stopped_at']
results_list=[]
for a in max_attempts:
    for temp in temps:
        best_fitnesses_run=[]
        run_times_run=[]
        iterations_run=[]
        for i in range(5):
            prob = mlr.DiscreteOpt(N, eval_func, max_val=N)
            start_time = time.perf_counter()
            best_state, best_fitness, curve = mlr.simulated_annealing(prob, max_attempts=a, max_iters=max_iters, schedule=mlr.GeomDecay(init_temp=temp), curve=True)
            run_time = time.perf_counter() - start_time
            best_fitnesses_run.append(best_fitness)
            run_times_run.append(run_time)
            stopped_at = len(curve)
            iterations_run.append(stopped_at)
            joblib.dump(pd.DataFrame.from_records(curve), f"queens/sa/max_attempts{a},temp{temp},run{i}_curve.pkl")
            
        results_list.append([a, r, np.mean(run_times_run), np.mean(best_fitnesses_run), np.mean(iterations_run)])

sa_results = pd.DataFrame.from_records(results_list, columns=labels)
joblib.dump(df_results, "queens/sa/queens_rhc_results.pkl")

['queens/sa/queens_rhc_results.pkl']

In [72]:
sa_results

Unnamed: 0,max_attempts,restarts,run_time,best_fitness,stopped_at
0,25,100,0.023354,26.4,262.8
1,25,100,0.029193,26.8,334.0
2,25,100,0.03685,26.4,420.6
3,25,100,0.052547,27.2,585.8
4,25,100,0.059829,27.0,645.0
5,50,100,0.020962,27.4,275.2
6,50,100,0.036669,27.6,469.0
7,50,100,0.052815,27.0,649.6
8,50,100,0.062011,27.6,730.2
9,50,100,0.064898,27.8,750.0


In [None]:
attempts = [10, 100, 1000]
populations = [100, 200, 300, 400]
mutation_probs = [0.1, 0.2, 0.3]
labels = ['max_attempts', 'population_size', 'mutation_probability', 'run_time', 'best_fitness', 'stopped_at']
results_list=[]
for a in attempts:
    for pop in populations:
        for prob in mutation_probs:
            best_fitnesses_run=[]
            run_times_run=[]
            iterations_run=[]
            for i in range(5):
                problem = mlr.DiscreteOpt(N, eval_func, max_val=N)
                start_time = time.perf_counter()
                (best_state, best_fitness, curve) = mlr.genetic_alg(
                    problem,
                    pop_size=pop,
                    mutation_prob = prob,
                    max_attempts=a,
                    max_iters=max_iters,
                    curve=True,
                    random_state=1
                )
                run_time = time.perf_counter() - start_time
                best_fitnesses_run.append(best_fitness)
                run_times_run.append(run_time)
                stopped_at = len(curve)
                iterations_run.append(stopped_at)
                joblib.dump(pd.DataFrame.from_records(curve), f"queens/ga/max_attempts{a},pop{pop},mut{prob},run{i}_curve.pkl")

        results_list.append((
            a,
            pop, 
            prob, 
            np.mean(run_times_run), 
            np.mean(best_fitnesses_run),
            np.mean(iterations_run),
        ))

ga_results = pd.DataFrame.from_records(results_list, columns=labels)
joblib.dump(df_results, "queens/ga/queens_ga_results.pkl")

In [79]:
joblib.dump(ga_results, "queens/ga/queens_ga_results.pkl")

['queens/ga/queens_ga_results.pkl']

In [85]:
populations = [100, 200, 300, 400]
probs = [0.1, 0.2, 0.3]
labels = ['population_size', 'mutation_probability', 'run_time', 'best_fitness', 'stopped_at']
results_list=[]
for pop in populations:
    for prob in mutation_probs:
        best_fitnesses_run=[]
        run_times_run=[]
        iterations_run=[]
        for i in range(5):                
            problem = mlr.DiscreteOpt(N, eval_func, max_val=N)
            start_time = time.perf_counter()
            best_state, best_fitness, curve = mlr.mimic(
                problem,
                pop_size=pop,
                keep_pct=prob,
                max_iters=5000,
                curve=True,
            )
            run_time = time.perf_counter() - start_time
            best_fitnesses_run.append(best_fitness)
            run_times_run.append(run_time)
            stopped_at = len(curve)
            iterations_run.append(stopped_at)
            joblib.dump(pd.DataFrame.from_records(curve), f"queens/mimic/pop{pop},mut{prob},run{i}_curve.pkl")

    results_list.append((
        pop, 
        prob, 
        np.mean(run_times_run), 
        np.mean(best_fitnesses_run),
        np.mean(iterations_run),
    ))

mimic_results = pd.DataFrame.from_records(results_list, columns=labels)
joblib.dump(mimic_results, "queens/mimic/queens_mimic_results.pkl")

ValueError: 5 columns passed, passed data had 6 columns

In [83]:
mimic_results

Unnamed: 0,max_attempts,population_size,mutation_probability,run_time,best_fitness,stopped_at
0,25,100,0.023354,26.4,262.8,
1,25,100,0.029193,26.8,334.0,
2,25,100,0.03685,26.4,420.6,
3,25,100,0.052547,27.2,585.8,
4,25,100,0.059829,27.0,645.0,
5,50,100,0.020962,27.4,275.2,
6,50,100,0.036669,27.6,469.0,
7,50,100,0.052815,27.0,649.6,
8,50,100,0.062011,27.6,730.2,
9,50,100,0.064898,27.8,750.0,


In [50]:
Ns = [2, 4, 8, 10, 15, 20, 25, 50, 100, 125]
rhc_run_times = []
rhc_best_fitnesses = []
rhc_fitness_per_iteration = []
rhc_fn_evals_per_iteration = []
for n in Ns:
    times=[]
    best_fitnesses=[]
    fitness_per_iteration=[]
    fn_evals_per_iteration=[]
    for i in range(5):
        prob = mlr.DiscreteOpt(int(n), eval_func, max_val=n)
        start_time = time.perf_counter()
        best_state, best_fitness, curve = mlr.random_hill_climb(prob, random_state=np.random.seed(), max_iters=1000, curve=True)
        run_time = time.perf_counter() - start_time
        fitness_per_iteration.append([c[0] for c in curve])
        fn_evals_per_iteration.append([c[1] for c in curve])
        times.append(run_time)
        best_fitnesses.append(best_fitness)
    
    fn_evals_avgs=helpers.avg_nested_lists(fn_evals_per_iteration)
    rhc_fn_evals_per_iteration.append(fn_evals_avgs)
    
    c_avgs=helpers.avg_nested_lists(fitness_per_iteration)
    rhc_fitness_per_iteration.append(c_avgs)

    avg_fitness = np.mean(best_fitnesses)
    rhc_best_fitnesses.append(avg_fitness)
    
    avg_run_time = np.mean(times)
    rhc_run_times.append(avg_run_time)


In [51]:
rhc_best_fitnesses

[0.0, 5.2, 24.0, 39.0, 97.0, 178.6, 287.2, 1183.6, 4874.0, 7651.4]

In [40]:
eval_func = Queens()
prob = mlr.DiscreteOpt(25, eval_func)
best_state, best_fitness, curve = mlr.random_hill_climb(prob, random_state=np.random.seed(), max_iters=1000, curve=True)

In [19]:
best_fitness, curve

(35.0,
 array([[29.,  1.],
        [31.,  3.],
        [31.,  4.],
        [31.,  5.],
        [31.,  6.],
        [31.,  7.],
        [33.,  9.],
        [35., 11.],
        [35., 12.],
        [35., 13.],
        [35., 14.],
        [35., 15.],
        [35., 16.],
        [35., 17.],
        [35., 18.],
        [35., 19.],
        [35., 20.],
        [35., 21.]]))

In [16]:
rhc_best_fitnesses, rhc_curves

([1.0, 4.8, 12.2, 15.8, 23.2, 32.0, 40.8, 80.2, 159.2, 199.4],
 [[1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5],
  [2.6,
   3.3,
   3.8,
   4.8,
   5.3,
   6.2,
   6.7,
   7.2,
   7.7,
   8.2,
   8.7,
   9.375,
   9.875,
   10.375,
   11.0,
   11.5],
  [5.9,
   6.4,
   7.5,
   8.0,
   8.5,
   9.2,
   9.7,
   10.5,
   11.0,
   11.5,
   12.125,
   13.0,
   13.5,
   14.0,
   14.5,
   15.0,
   15.25,
   15.75,
   17.0,
   17.5,
   18.0,
   18.5],
  [7.3,
   7.8,
   8.8,
   9.3,
   10.1,
   10.6,
   11.4,
   11.9,
   12.7,
   13.5,
   14.3,
   14.8,
   15.3,
   16.0,
   16.5,
   17.0,
   17.5,
   18.0,
   18.5,
   19.0,
   19.5],
  [11.4,
   11.9,
   12.7,
   13.5,
   14.4,
   14.9,
   15.7,
   16.2,
   16.7,
   17.2,
   17.625,
   17.833333333333332,
   18.833333333333332,
   19.333333333333332,
   19.833333333333332,
   20.5,
   21.0,
   21.5,
   22.0,
   22.5,
   23.0,
   23.5,
   24.0],
  [15.5,
   16.0,
   16.8,
   17.3,
   18.1,
   18.6,
   19.1,
   19.6,
   20.3,
   20.8,
   22.0

In [10]:
Ns = [2, 4, 8, 10, 15, 20, 25, 50, 100, 125]

sa_run_times = []
sa_best_fitnesses = []
for n in Ns:
    times=[]
    best_fitnesses=[]
    for i in range(5):
        eval_func = Queens()
        prob = mlr.DiscreteOpt(int(n), eval_func)
        start_time = time.perf_counter()
        best_state, best_fitness, curve = mlr.simulated_annealing(prob, max_iters=1000, random_state=np.random.seed(), schedule=mlr.GeomDecay(init_temp=100), curve=True)
        run_time = time.perf_counter() - start_time
        times.append(run_time)
        best_fitnesses.append(best_fitness)
        
    avg = np.mean(best_fitnesses)
    sa_best_fitnesses.append(avg)
    
    avg_run_time = np.mean(times)
    sa_run_times.append(avg_run_time)

KeyboardInterrupt: 

In [9]:
sa_best_fitnesses, sa_run_times

([1.0, 5.0, 12.4, 16.4, 26.2, 35.8, 44.2, 89.4, 181.4, 225.4],
 [0.20211157260002893,
  0.14289394819998052,
  0.23267166800003453,
  0.30858986640002967,
  0.4056089565999855,
  0.5980944187999739,
  0.6580483252000249,
  1.334989685399978,
  2.9841130700000122,
  3.849937838400001])