In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os
pd.set_option('max_columns', None)
pd.set_option('max_rows', None)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score

In [2]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose

## Part 1

What is an optimization problem? 

- We aim to find the best state, according to some objective function. 
- An example of a state is the weights used in a machine learning model, such as a neural network
- State is represented by a one-dimensional vector 
- "Best" state is defined using the Cost Function/Fitness Function/Objective Function 
- Function takes in a State Array as Input and returns a Fitness Value as Output


We will make 3 steps to solve an optimization problem with mlrose:
 - 1) Define fitness function object
 - 2) Define an optimization problem object
 - 3) Select and run a randomized optimization algorithm

#### 1) Define Fitness Function Object

In [6]:
fitness1 = mlrose.FourPeaks(t_pct=0.1)

#fitness2 = mlrose.MaxKColor(edges=)
#fitness3 = mlrose.FlipFlop()

#### 2) Define an Optimization Problem Object

In [153]:
#this requires a LENGTH - we can tune this 

problem1 = mlrose.DiscreteOpt(length = 100, fitness_fn = fitness1, maximize = True)

#### 3) Select and Run Randomized Optimization Algorithm

##### Find Optimzal Parameters for RHC, SA, GA, Mimic for the Four Peaks Problem 

RHC - Modify init_state and restarts

In [157]:
#init_state = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
restarts_list = np.arange(0, 11, 1)


for num in restarts_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem1, restarts = int(num),
                                                   max_attempts = 10, max_iters = 1000,
                                                   #init_state=init_state, 
                                                        random_state = 42)
    #print(best_state)
    print(best_fitness)


1.0
1.0
3.0
3.0
4.0
4.0
5.0
5.0
5.0
5.0
8.0


In [159]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem1, restarts = 1,
                                                   max_attempts = int(num), max_iters = 1000,
                                                   #init_state=init_state, 
                                                   random_state = 42)
    #print(best_state)
    print(best_fitness)


1.0
20.0
26.0
26.0
26.0


In [161]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem1, restarts = 1,
                                                   max_attempts = 10, max_iters = int(num),
                                                   #init_state=init_state, 
                                                    random_state = 42)
    #print(best_state)
    print(best_fitness)


1.0
1.0
1.0
1.0
1.0


##### Simulated Annealing

In [163]:
scheduleG = mlrose.GeomDecay()
scheduleA = mlrose.ArithDecay()
scheduleE = mlrose.ExpDecay()

best_state, best_fitness = mlrose.simulated_annealing(problem=problem1, schedule = scheduleG,
                                               max_attempts = 10, max_iters = 1000,
                                               #init_state=init_state, 
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

32.0


In [165]:
best_state, best_fitness = mlrose.simulated_annealing(problem=problem1, schedule = scheduleA,
                                               max_attempts = 10, max_iters = 1000,
                                               #init_state=init_state, 
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

14.0


In [166]:
best_state, best_fitness = mlrose.simulated_annealing(problem=problem1, schedule = scheduleE,
                                               max_attempts = 10, max_iters = 1000,
                                               #init_state=init_state, 
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

18.0


Max Attempts

In [173]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.simulated_annealing(problem=problem1, schedule = scheduleG,
                                                   max_attempts = num, max_iters = 1000,
                                                   #init_state=init_state, 
                                                        random_state = 42)

    #print(best_state)
    print(best_fitness)


32.0
32.0
32.0
32.0
32.0


  prob = np.exp(delta_e/temp)


Max Iters

In [174]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.simulated_annealing(problem=problem1, schedule = scheduleG,
                                                   max_attempts = 10, max_iters = num,
                                                   #init_state=init_state, 
                                                        random_state = 42)
    
    #print(best_state)
    print(best_fitness)


1.0
1.0
32.0
51.0
51.0


##### Genetic Alg

In [167]:
best_state, best_fitness = mlrose.genetic_alg(problem=problem1, 
                                              pop_size = 200, mutation_prob = 0.1,
                                              max_attempts = 10, max_iters = 1000,
                                               random_state = 42)


print(best_fitness)

114.0


pop_size

In [191]:
pop_size_list = [.10, .25, .50, .75, 1.0, 1.50, 2.0, 2.50, 3.0, 5.0]
problem_length = problem1.length

for num in pop_size_list:
    size =  problem_length*num
    best_state, best_fitness = mlrose.genetic_alg(problem=problem1, 
                                                  pop_size = int(size), mutation_prob = 0.1,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("pop_size:", size, " best_fitness:", best_fitness)


pop_size: 10.0  best_fitness: 8.0
pop_size: 25.0  best_fitness: 15.0
pop_size: 50.0  best_fitness: 26.0
pop_size: 75.0  best_fitness: 16.0
pop_size: 100.0  best_fitness: 115.0
pop_size: 150.0  best_fitness: 14.0
pop_size: 200.0  best_fitness: 114.0
pop_size: 250.0  best_fitness: 17.0
pop_size: 300.0  best_fitness: 117.0
pop_size: 500.0  best_fitness: 120.0


mutation_prob

In [180]:
mutation_prob_list = np.arange(0.1, 1, 0.1) 

for num in mutation_prob_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem1, 
                                                  pop_size = 200, mutation_prob = num,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("mutation_prob:", num, "best_fitness:", best_fitness)

mutation_prob: 0.1 best_fitness: 114.0
mutation_prob: 0.2 best_fitness: 17.0
mutation_prob: 0.30000000000000004 best_fitness: 13.0
mutation_prob: 0.4 best_fitness: 12.0
mutation_prob: 0.5 best_fitness: 13.0
mutation_prob: 0.6 best_fitness: 12.0
mutation_prob: 0.7000000000000001 best_fitness: 15.0
mutation_prob: 0.8 best_fitness: 14.0
mutation_prob: 0.9 best_fitness: 12.0


max_iters

In [195]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem1, 
                                                  pop_size = 5*problem1.length, mutation_prob = 0.1,
                                                   max_attempts = 10, max_iters = num,
                                                        random_state = 42)

    print("max_iters",num ," best_fitness:", best_fitness)


max_iters 10  best_fitness: 112.0
max_iters 100  best_fitness: 120.0
max_iters 1000  best_fitness: 120.0
max_iters 10000  best_fitness: 120.0
max_iters 100000  best_fitness: 120.0


max_attempts

In [194]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem1, 
                                                  pop_size = 5*problem1.length, mutation_prob = 0.1,
                                                   max_attempts = num, max_iters = 1000,
                                                    random_state = 42)

    print("max_attempts",num ," best_fitness:", best_fitness)


max_attempts 10  best_fitness: 120.0
max_attempts 100  best_fitness: 122.0
max_attempts 1000  best_fitness: 127.0
max_attempts 10000  best_fitness: 127.0
max_attempts 100000  best_fitness: 127.0


##### Mimic 

keep_pct

In [196]:
keep_pct_list = np.arange(0.1, 1, 0.1) 

for num in keep_pct_list:
    best_state, best_fitness = mlrose.mimic(problem=problem1, 
                                                  pop_size = 200, keep_pct = num,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("keep_pct:", num," best_fitness:",best_fitness)

keep_pct: 0.1  best_fitness: 114.0
keep_pct: 0.2  best_fitness: 20.0
keep_pct: 0.30000000000000004  best_fitness: 26.0
keep_pct: 0.4  best_fitness: 115.0
keep_pct: 0.5  best_fitness: 13.0
keep_pct: 0.6  best_fitness: 12.0
keep_pct: 0.7000000000000001  best_fitness: 12.0
keep_pct: 0.8  best_fitness: 11.0
keep_pct: 0.9  best_fitness: 11.0


pop_size

In [188]:
pop_size_list = [.10, .25, .50, .75, 1.0, 1.50, 2.0, 2.50, 3.0, 5.0]
problem_length = problem1.length


for num in pop_size_list:
    size =  num * problem_length
    
    best_state, best_fitness = mlrose.mimic(problem=problem1, 
                                                  pop_size = int(size), keep_pct = 0.20,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)


    print("pop_size:", size, " best_fitness:",best_fitness)

pop_size: 10.0  best_fitness: 6.0
pop_size: 25.0  best_fitness: 10.0


KeyboardInterrupt: 

max_iters

In [None]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.mimic(problem=problem1, 
                                              pop_size = problem1.length*5, keep_pct = 0.20,
                                               max_attempts = 10, max_iters = int(num),
                                                    random_state = 42)


    print("max_iters:", num, " best_fitness:",best_fitness)


max_attempts

In [None]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.mimic(problem=problem1, 
                                              pop_size = problem1.length*5, keep_pct = 0.20,
                                               max_attempts = int(num), max_iters = 1000,
                                                    random_state = 42)


    print("max_attempts:", num, " best_fitness:",best_fitness)
    

In [108]:
list_algorithms = [mlrose.random_hill_climb , mlrose.simulated_annealing , mlrose.genetic_alg , mlrose.mimic]

init_state = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

for alg in list_algorithms:
    bs, bf =  alg(problem=problem1, random_state = 42)
    print(alg)
    print(bs)
    print(bf)

<function random_hill_climb at 0x7feee32d0a70>
[0 1 0 0 0 1 0 0 0 1]
0.0
<function simulated_annealing at 0x7feee32d0cb0>
[1 1 1 1 1 1 1 1 0 0]
18.0
<function genetic_alg at 0x7feee32d0f80>
[1 1 0 0 0 0 0 0 0 0]
18.0
<function mimic at 0x7feee32d0830>
[1 1 1 1 1 1 1 1 0 0]
18.0


In [134]:
dict_algs = {
    'RHC': {"algorithm": mlrose.random_hill_climb, 
            "init_state": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
            "restarts": 1},
    'SA': {"algorithm": mlrose.simulated_annealing,
          "init_state": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
           "schedule": mlrose.GeomDecay(init_temp=10, decay=0.95, min_temp=1)},
    'GA': {"algorithm": mlrose.genetic_alg,
          "pop_size": 200,
          "mutation_prob": .1},
    'MIMIC': {"algorithm": mlrose.mimic,
             "pop_size": 200,
             "keep_pct": 0.2}
}

for key, value in dict_algs.items():
    #print(key)
    #print(value)
    #print(dict_algs[alg])
    a = value['algorithm']
    for keys, vals in value.items():
        print(keys, vals)
        

    bs, bf =  a(problem=problem1, random_state = 42)
    print(bs)
    print(bf)
    


algorithm <function random_hill_climb at 0x7feee32d0a70>
init_state [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
restarts 1
[0 1 0 0 0 1 0 0 0 1]
0.0
algorithm <function simulated_annealing at 0x7feee32d0cb0>
init_state [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
schedule <mlrose.decay.GeomDecay object at 0x7feee34230d0>
[1 1 1 1 1 1 1 1 0 0]
18.0
algorithm <function genetic_alg at 0x7feee32d0f80>
pop_size 200
mutation_prob 0.1
[1 1 0 0 0 0 0 0 0 0]
18.0
algorithm <function mimic at 0x7feee32d0830>
pop_size 200
keep_pct 0.2
[1 1 1 1 1 1 1 1 0 0]
18.0
