In [None]:
import pandas as pd
import numpy as np
import mlrose 
from collections import defaultdict
from time import clock
from itertools import combinations
import itertools

In [None]:
# set up for TSP for problem 1 - Traveling Salesman
# SOURCE: https://mlrose.readthedocs.io/en/stable/source/tutorial2.html

# Create list of city coordinates
coords_list = [(1, 1), (4, 2), (5, 2), (6, 4), (4, 4), (3, 6), (1, 5), (2, 3)]

# Initialize fitness function object using coords_list
fitness_coords = mlrose.TravellingSales(coords = coords_list)



In [None]:
# Set up for Problem 3 - Knapsack:

#import random
# generate list of random numbers for weights and values
random.seed(42)
weights = [10, 5, 2, 8, 15]
values = [1, 2, 3, 4, 5]
max_weight_pct = 0.6
fitness3 = mlrose.Knapsack(weights, values, max_weight_pct)
# round weights to two decimal places


In [None]:
#define a fitness function for each problem
fitness1 = mlrose.FourPeaks(t_pct=0.1)
fitness3 = mlrose.Knapsack(weights, values, max_weight_pct)
#fitness2 = mlrose.OneMax()
#fitness3 = mlrose.Queens()

# define the problems
# 8-bit string where each string can have up to 8 values (0-7)
problem1 = mlrose.TSPOpt(length = 8, fitness_fn = fitness_coords, maximize=True)
problem2 = mlrose.DiscreteOpt(length = 8, fitness_fn = fitness3, maximize = True, max_val = 8)
problem3 = mlrose.DiscreteOpt(length = 5, fitness_fn = fitness2, maximize = True, max_val = 3)




In [None]:
# Define decay schedules (these have default parameter values)
exp_schedule = mlrose.ExpDecay(init_temp=1.0, exp_const=0.005, min_temp=0.001)
geom_schedule = mlrose.GeomDecay(init_temp = 1.0, decay=0.99, min_temp=0.001)
arith_schedule = mlrose.ArithDecay(init_temp = 1.0, decay=0.0001, min_temp=0.001)

In [None]:
#def get_params(problem):

RHC_paramdict = { #'problem': problem,
                 'max_attempts': [5, 10, 20, 50, 100], 
                 'max_iters':[20, 50, 100, 300, 500, 1000, 1500, 2000],
                 'restarts': [1, 5, 10, 20, 50],
                 #'init_state': None
                 }

SA_paramdict = {#'problem': problem ,
                'schedule': [exp_schedule, geom_schedule, arith_schedule]  ,
                 'max_attempts': [5, 10, 20, 50, 75, 100], 
                 'max_iters':[20, 50, 100, 300, 500, 1000, 1500, 2000],
                 #'init_state':None
                 }

GA_paramdict = { #'problem': problem ,
                'pop_size': [50, 100, 200, 500] , 
                'mutation_prob': [0.1, 0.3, 0.5, 0.7, 0.9], 
                'max_attempts': [5, 10, 20],   
                'max_iters':[50, 100, 500, 1000, 1500]
                 }

#testing fewer parameters for mimic due to run time constraints
MIMIC_paramdict = {#'problem': problem ,
                'pop_size': [100, 200, 500] ,   # 200 is default
                'keep_pct': [0.1, 0.2, 0.4, 0.8],    # 0.2 is default
                'max_attempts': [5, 10, 50, 100],    # 10 is default
                'max_iters':[50, 100, 300, 1000]      # default is inf
                 }
    

In [None]:
# Iterate thru different parameters of RHC and record fitness values

def run_experiments(algorithm, problem, problem_name):
    print("algorithm =", algorithm)
    print(problem)
    print(problem_name)
    
    if algorithm=='RHC':
        paramdict = RHC_paramdict.copy()
    if algorithm=='SA':
        paramdict = SA_paramdict.copy()
    if algorithm=='GA':
        paramdict = GA_paramdict.copy()
    if algorithm=='MIMIC':
        paramdict = MIMIC_paramdict.copy()

    # get all the different combinations of parameters
    keys, values = zip(*paramdict.items())
    params = [dict(zip(keys, v)) for v in itertools.product(*values)]
    
    Opt_list = []
    i=0
    while i in range (0, len(params)):
                    prob = problem
#                     attempts = params[i]['max_attempts']
#                     iters = params[i]['max_iters']
#                     restarts = params[i]['restarts']
                    #initial = params[i]['init_state']
                    
                    keyword_args = params[i] 
                    
                    print('Now running experiment {} of {}...'.format(i,len(params)))

                    # Set random seed
                    np.random.seed(42) # bc 42 is the meaning of life

                    # start the clock - training
                    st = clock()
                    
                    if algorithm=='RHC':
                        best_state, best_fitness = mlrose.random_hill_climb(**keyword_args, problem=prob)
                    if algorithm=='SA':
                        best_state, best_fitness = mlrose.simulated_annealing(**keyword_args, problem=prob)
                    if algorithm=='GA':
                        best_state, best_fitness = mlrose.genetic_alg(**keyword_args, problem=prob)
                    if algorithm=='MIMIC':
                        best_state, best_fitness = mlrose.mimic(**keyword_args, problem=prob)
                    
                    # stop train time, save time elapsed
                    solve_time = clock()-st
                    
#                     print('Best State of experiment {} is {}'.format(i,best_state))
#                     print('Best Fitness of experiment {} is {}'.format(i,best_fitness))
                    print('Time to solve = {}'.format(solve_time))
                    print('--------')

                    results = [i, best_fitness, best_state, solve_time, prob]
                    for key in keyword_args:
                        value = keyword_args[key]
                        results.append(value)
                    Opt_list.append(results)
                    
                    # iterate  
                    i+=1

    #save the results and export
    resultsTable = pd.DataFrame(Opt_list)
    
    # rename the columns
    colnames = ['Run_no', 'best_fitness', 'best_state', 'solve_time', 'problem']
    for key in keyword_args:
        colnames.append(key)
    resultsTable.columns = colnames
    #resultsTable = resultsTable.sort_values('best_fitness', ascending=False)
    resultsTable.to_csv('./output/'+problem_name+'_'+algorithm+'.csv',index=False)
    return Opt_list


In [None]:
paramdict = MIMIC_paramdict.copy()
problem = problem2

# get all the different combinations of parameters
keys, values = zip(*paramdict.items())
params = [dict(zip(keys, v)) for v in itertools.product(*values)]

Opt_list = []
i=0
while i in range (0, len(params)):
                prob = problem
#                     attempts = params[i]['max_attempts']
#                     iters = params[i]['max_iters']
#                     restarts = params[i]['restarts']
                #initial = params[i]['init_state']

                keyword_args = params[i] 

                print('Now running experiment {} of {}...'.format(i,len(params)))

                # Set random seed
                np.random.seed(42) # bc 42 is the meaning of life

                # start the clock - training
                st = clock()

#                 if algorithm=='RHC':
#                     best_state, best_fitness = mlrose.random_hill_climb(**keyword_args, problem=prob)
#                 if algorithm=='SA':
#                     best_state, best_fitness = mlrose.simulated_annealing(**keyword_args, problem=prob)
#                 if algorithm=='GA':
#                     best_state, best_fitness = mlrose.genetic_alg(**keyword_args, problem=prob)
#                 if algorithm=='MIMIC':
                best_state, best_fitness = mlrose.mimic(**keyword_args, problem=prob)

                # stop train time, save time elapsed
                solve_time = clock()-st

#                     print('Best State of experiment {} is {}'.format(i,best_state))
#                     print('Best Fitness of experiment {} is {}'.format(i,best_fitness))
                print('Time to solve = {}'.format(solve_time))
                print('--------')

                results = [i, best_fitness, best_state, solve_time, prob]
                for key in keyword_args:
                    value = keyword_args[key]
                    results.append(value)
                Opt_list.append(results)

                # iterate  
                i+=1





In [None]:
#save the results and export
resultsTable = pd.DataFrame(Opt_list)

# rename the columns
colnames = ['Run_no', 'best_fitness', 'best_state', 'solve_time', 'problem']
for key in keyword_args:
    colnames.append(key)
resultsTable.columns = colnames
#resultsTable = resultsTable.sort_values('best_fitness', ascending=False)
resultsTable.to_csv('./output/Problem3_'+algorithm+'.csv',index=False)
#return Opt_list


In [None]:
run_experiments(algorithm ='RHC', problem=problem1, problem_name="problem1")

In [None]:
run_experiments(algorithm ='SA', problem=problem1, problem_name="problem1")

In [None]:
run_experiments(algorithm ='GA', problem=problem1, problem_name="problem1")

In [None]:
run_experiments(algorithm ='MIMIC', problem=problem1, problem_name="problem1")

In [None]:
# PROBLEM 2 ========

In [None]:
run_experiments(algorithm ='RHC', problem=problem2, problem_name="problem2")

In [None]:
run_experiments(algorithm ='SA', problem=problem2, problem_name="problem2")

In [None]:
run_experiments(algorithm ='GA', problem=problem2, problem_name="problem2")

In [None]:
run_experiments(algorithm ='MIMIC', problem=problem2, problem_name="problem2")

In [None]:
# PROBLEM 3 ========

In [None]:
run_experiments(algorithm ='RHC', problem=problem3, problem_name="problem3")

In [None]:
run_experiments(algorithm ='SA', problem=problem3, problem_name="problem3")

In [None]:
run_experiments(algorithm ='GA', problem=problem3, problem_name="problem3")

In [None]:
run_experiments(algorithm ='MIMIC', problem=problem3, problem_name="problem3")

In [None]:
mimic_params = ['pop_size','max_attempts', 'max_iters', 'keep_pct']
ga_params = ['pop_size','max_attempts', 'max_iters', 'mutation_prob']
rhc_params = ['max_attempts', 'max_iters', 'restarts', 'initial']
sa_params = ['schedule','attempts', 'max_iters', 'initial']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from textwrap import wrap

# style
plt.style.use('seaborn-darkgrid')
# create a color palette
palette = plt.get_cmap('Set1')



def param_scatterplots(algorithm, problem_number ):
    
    mimic_params = ['pop_size','max_attempts', 'max_iters', 'keep_pct']
    ga_params = ['pop_size','max_attempts', 'max_iters', 'mutation_prob']
    rhc_params = ['max_attempts', 'max_iters', 'restarts'] #, 'initial']
    sa_params = ['schedule','max_attempts', 'max_iters']# , 'initial']
    
    mimic_axis = ['Population Size','Max Number of Attempts', 'Max Iterations', 'Percentage Kept']
    ga_axis = ['Population Size','Max Number of Attempts', 'Max Iterations', 'Mutation Probability']
    rhc_axis = ['Max Number of Attempts', 'Max Iterations', 'Number of Restarts', 'Initial Value']
    sa_axis = ['Decay Schedule','Max Number of Attempts', 'Max Iterations', 'Initial Value']
    
    if algorithm == 'MIMIC':
        params = mimic_params 
        axis = mimic_axis
    if algorithm == 'GA':
        params = ga_params
        axis = ga_axis
    if algorithm == 'RHC':
        params = rhc_params 
        axis = rhc_axis
    if algorithm == 'SA':
        params = sa_params 
        axis= sa_axis
    
    for i in range(0, len(params)):
            param = params[i]
            # get the data
            file = './output/problem'+str(problem_number)+'_'+algorithm+'.csv'
            #Part2_Prob1_GA
            reg = pd.read_csv (file, sep =",")
            
            # Use the 'hue' argument to provide a factor variable
            sns.lmplot( x="solve_time", y="best_fitness", data=reg, fit_reg=False, hue=param, 
                       legend=False)
 
            # Move the legend to an empty part of the plot
            #plt.legend(loc='lower right')
            plt.title(algorithm+' Algorithm: Problem '+str(problem_number)+'\n Solve Time & Fitness Score by '+axis[i], loc='center', fontsize=12, fontweight=0, color='darkblue')
            plt.xlabel('Time to Reach Best Fitness Score')
            plt.ylabel('Best Fitness Score')
            #plt.ylim(ymax = .9, ymin =0)
            plt.legend(loc='best', ncol=2, frameon=True) #, framealpha=2.0)
            plt.xticks(rotation=0)
            plt.savefig('./images/Problem'+str(problem_number)+'_'+algorithm+'_scatt_timeVS'+param+'.png')
            plt.show()
            #plt.close()
            i+=1
    

In [None]:
param_scatterplots(algorithm='SA', problem_number=3 )

In [None]:
def param_barcharts(algorithm, problem_number ):
    
    mimic_params = ['pop_size','max_attempts', 'max_iters', 'keep_pct']
    ga_params = ['pop_size','max_attempts', 'max_iters', 'mutation_prob']
    rhc_params = ['max_attempts', 'max_iters', 'restarts']#, 'initial']
    sa_params = ['schedule','max_attempts', 'max_iters'] #, 'initial']

    mimic_axis = ['Population Size','Max Number of Attempts', 'Max Iterations', 'Percentage Kept']
    ga_axis = ['Population Size','Max Number of Attempts', 'Max Iterations', 'Mutation Probability']
    rhc_axis = ['Max Number of Attempts', 'Max Iterations', 'Number of Restarts'] #, 'Initial Value']
    sa_axis = ['Decay Schedule','Max Number of Attempts', 'Max Iterations' ]#, 'Initial Value']

    if algorithm == 'MIMIC':
        params = mimic_params
        axis = mimic_axis
        barcolor = 'darkblue'
        alg = "MIMIC"
    if algorithm == 'GA':
        params = ga_params
        axis = ga_axis
        barcolor = 'green'
        alg = 'Genetic'
    if algorithm == 'RHC':
        params = rhc_params
        axis = rhc_axis
        barcolor = 'tomato'
        alg = 'Random Hill Climbing'
    if algorithm == 'SA':
        params = sa_params
        axis= sa_axis
        barcolor = 'goldenrod'
        alg = 'Simulated Annealing'

    for i in range(0, len(params)):
                param = params[i]
                # get the data
                file = './output/problem'+str(problem_number)+'_'+algorithm+'.csv'
                #Part2_Prob1_GA
                res = pd.read_csv (file, sep =",")
                res[param] = res[param].astype('category')
                res_byparam = res.groupby([param])['best_fitness'].mean()
                #figure(1)

                res_byparam.plot.bar(color=barcolor, edgecolor='black')
                # Move the legend to an empty part of the plot
                #plt.legend(loc='lower right')
                plt.title(alg+' Algorithm, Problem '+str(problem_number)+' \nFitness Score by '+axis[i], loc='center', fontsize=12, fontweight=0, color='darkblue')
                plt.xlabel(axis[i])
                plt.ylabel('Average Fitness Score')
                #plt.ylim(ymax = 36, ymin =30)
                plt.legend(loc='best', ncol=2, frameon=True) #, framealpha=2.0)
                plt.xticks(rotation=0)
                plt.savefig('./images/'+algorithm+'_bar_fitnessVS'+param+'.png')
                plt.show()
                #plt.close()
                i+=1

    
    


In [None]:
param_barcharts(algorithm="GA", problem_number=1 )

In [None]:
param_barcharts(algorithm="SA", problem_number=1 )

In [None]:
param_barcharts(algorithm="MIMIC", problem_number=1 )

In [None]:
param_barcharts(algorithm="RHC", problem_number=1 )