In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os
pd.set_option('max_columns', None)
pd.set_option('max_rows', None)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score

In [2]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose

## Part 1

What is an optimization problem? 

- We aim to find the best state, according to some objective function. 
- An example of a state is the weights used in a machine learning model, such as a neural network
- State is represented by a one-dimensional vector 
- "Best" state is defined using the Cost Function/Fitness Function/Objective Function 
- Function takes in a State Array as Input and returns a Fitness Value as Output


We will make 3 steps to solve an optimization problem with mlrose:
 - 1) Define fitness function object
 - 2) Define an optimization problem object
 - 3) Select and run a randomized optimization algorithm

#### 1) Define Fitness Function Object

In [47]:
#fitness1 = mlrose.FourPeaks(t_pct=0.1)
#fitness2 = mlrose.MaxKColor(edges=edges)
#fitness3 = mlrose.FlipFlop()

import random

random.seed(42)
weights = []
values = []

for i in range(100):    
    weights.append(random.randint(1, 2))
    values.append(random.randint(1, 2))

print(weights)
print(values)
fitness4 = mlrose.Knapsack(weights=weights, values=values, max_weight_pct = 0.35)

[1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1]


#### 2) Define an Optimization Problem Object

In [48]:
#this requires a LENGTH of Edges - we can tune this 

problem4 = mlrose.DiscreteOpt(length = 100, fitness_fn = fitness4, maximize = True, max_val = 2)

In [49]:
problem4.length

100

#### 3) Select and Run Randomized Optimization Algorithm

##### Find Optimzal Parameters for RHC, SA, GA, Mimic for the Knap Sack Problem

##### Random Hill Climbing

restarts

In [69]:
restarts_list = np.arange(0, 11, 1)

restarts_probs = [.25, .50, .75, 1.0, 2.0]
problem_length = np.array(problem4.length)

restarts_list2 = np.dot(restarts_probs, problem_length)

restarts_list_full =  np.concatenate((restarts_list, restarts_list2))

for num in restarts_list_full:
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem4, restarts = int(num),
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)
    #print(best_state)
    print("restarts:",num, "best_fitness:",best_fitness)


restarts: 0.0 best_fitness: 0.0
restarts: 1.0 best_fitness: 0.0
restarts: 2.0 best_fitness: 0.0
restarts: 3.0 best_fitness: 0.0
restarts: 4.0 best_fitness: 0.0
restarts: 5.0 best_fitness: 0.0
restarts: 6.0 best_fitness: 0.0
restarts: 7.0 best_fitness: 0.0
restarts: 8.0 best_fitness: 0.0
restarts: 9.0 best_fitness: 0.0
restarts: 10.0 best_fitness: 0.0
restarts: 25.0 best_fitness: 0.0
restarts: 50.0 best_fitness: 0.0
restarts: 75.0 best_fitness: 0.0
restarts: 100.0 best_fitness: 0.0
restarts: 200.0 best_fitness: 52.0


max_attempts

In [51]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem4, restarts = 1,
                                                   max_attempts = int(num), max_iters = 1000,
                                                   random_state = 42)
    #print(best_state)
    print(best_fitness)


0.0
0.0
0.0
0.0
0.0


max_iters

In [52]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem4, restarts = 1,
                                                   max_attempts = 10, max_iters = int(num),
                                                    random_state = 42)
    #print(best_state)
    print(best_fitness)


0.0
0.0
0.0
0.0
0.0


##### Simulated Annealing

schedule

In [54]:
scheduleG = mlrose.GeomDecay()
scheduleA = mlrose.ArithDecay()
scheduleE = mlrose.ExpDecay()

best_state, best_fitness = mlrose.simulated_annealing(problem=problem4, schedule = scheduleG,
                                               max_attempts = 10, max_iters = 1000,
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

51.0


  prob = np.exp(delta_e/temp)


In [55]:
best_state, best_fitness = mlrose.simulated_annealing(problem=problem4, schedule = scheduleA,
                                               max_attempts = 10, max_iters = 1000,
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

51.0


In [56]:
best_state, best_fitness = mlrose.simulated_annealing(problem=problem4, schedule = scheduleE,
                                               max_attempts = 10, max_iters = 1000,
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

51.0


Max Attempts

In [57]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.simulated_annealing(problem=problem4, schedule = scheduleG,
                                                   max_attempts = num, max_iters = 1000,
                                                        random_state = 42)

    #print(best_state)
    print(best_fitness)


51.0
51.0
51.0
51.0
51.0


Max Iters

In [58]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.simulated_annealing(problem=problem4, schedule = scheduleG,
                                                   max_attempts = 10, max_iters = num,
                                                        random_state = 42)
    
    #print(best_state)
    print(best_fitness)


0.0
0.0
51.0
51.0
51.0


##### Genetic Alg

In [59]:
best_state, best_fitness = mlrose.genetic_alg(problem=problem4, 
                                              pop_size = 200, mutation_prob = 0.1,
                                              max_attempts = 10, max_iters = 1000,
                                               random_state = 42)


print(best_fitness)

58.0


pop_size

In [60]:
pop_size_list = [.10, .25, .50, .75, 1.0, 1.50, 2.0, 2.50, 3.0, 5.0]
problem_length = problem4.length

for num in pop_size_list:
    size =  problem_length*num
    best_state, best_fitness = mlrose.genetic_alg(problem=problem4, 
                                                  pop_size = int(size), mutation_prob = 0.1,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("pop_size:", size, " best_fitness:", best_fitness)


pop_size: 10.0  best_fitness: 0.0
pop_size: 25.0  best_fitness: 0.0
pop_size: 50.0  best_fitness: 0.0
pop_size: 75.0  best_fitness: 59.0
pop_size: 100.0  best_fitness: 0.0
pop_size: 150.0  best_fitness: 0.0
pop_size: 200.0  best_fitness: 58.0
pop_size: 250.0  best_fitness: 60.0
pop_size: 300.0  best_fitness: 0.0
pop_size: 500.0  best_fitness: 64.0


mutation_prob

In [61]:
mutation_prob_list = np.arange(0.1, 1, 0.1) 

for num in mutation_prob_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem4, 
                                                  pop_size = 200, mutation_prob = num,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("mutation_prob:", num, "best_fitness:", best_fitness)

mutation_prob: 0.1 best_fitness: 58.0
mutation_prob: 0.2 best_fitness: 61.0
mutation_prob: 0.30000000000000004 best_fitness: 59.0
mutation_prob: 0.4 best_fitness: 55.0
mutation_prob: 0.5 best_fitness: 58.0
mutation_prob: 0.6 best_fitness: 57.0
mutation_prob: 0.7000000000000001 best_fitness: 54.0
mutation_prob: 0.8 best_fitness: 58.0
mutation_prob: 0.9 best_fitness: 58.0


max_iters

In [62]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem4, 
                                                  pop_size = 5*problem4.length, mutation_prob = 0.1,
                                                   max_attempts = 10, max_iters = num,
                                                        random_state = 42)

    print("max_iters:",num ," best_fitness:", best_fitness)


max_iters: 10  best_fitness: 60.0
max_iters: 100  best_fitness: 64.0
max_iters: 1000  best_fitness: 64.0
max_iters: 10000  best_fitness: 64.0
max_iters: 100000  best_fitness: 64.0


max_attempts

In [63]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem4, 
                                                  pop_size = 5*problem4.length, mutation_prob = 0.1,
                                                   max_attempts = num, max_iters = 1000,
                                                    random_state = 42)

    print("max_attempts",num ," best_fitness:", best_fitness)


max_attempts 10  best_fitness: 64.0
max_attempts 100  best_fitness: 64.0
max_attempts 1000  best_fitness: 64.0
max_attempts 10000  best_fitness: 64.0
max_attempts 100000  best_fitness: 64.0


##### Mimic 

keep_pct

In [64]:
keep_pct_list = np.arange(0.1, 1, 0.1) 

for num in keep_pct_list:
    best_state, best_fitness = mlrose.mimic(problem=problem4, 
                                                  pop_size = 200, keep_pct = num,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("keep_pct:", num," best_fitness:",best_fitness)

keep_pct: 0.1  best_fitness: 55.0
keep_pct: 0.2  best_fitness: 55.0
keep_pct: 0.30000000000000004  best_fitness: 55.0
keep_pct: 0.4  best_fitness: 55.0
keep_pct: 0.5  best_fitness: 55.0
keep_pct: 0.6  best_fitness: 55.0
keep_pct: 0.7000000000000001  best_fitness: 55.0
keep_pct: 0.8  best_fitness: 55.0
keep_pct: 0.9  best_fitness: 55.0


pop_size

In [65]:
pop_size_list = [.10, .25, .50, .75, 1.0, 1.50, 2.0, 2.50, 3.0, 5.0]
problem_length = problem4.length


for num in pop_size_list:
    size =  num * problem_length
    
    best_state, best_fitness = mlrose.mimic(problem=problem4, 
                                                  pop_size = int(size), keep_pct = 0.20,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)


    print("pop_size:", size, " best_fitness:",best_fitness)

pop_size: 10.0  best_fitness: 0.0
pop_size: 25.0  best_fitness: 0.0
pop_size: 50.0  best_fitness: 50.0
pop_size: 75.0  best_fitness: 0.0
pop_size: 100.0  best_fitness: 48.0
pop_size: 150.0  best_fitness: 53.0
pop_size: 200.0  best_fitness: 55.0
pop_size: 250.0  best_fitness: 53.0
pop_size: 300.0  best_fitness: 54.0
pop_size: 500.0  best_fitness: 56.0


max_iters

In [66]:
#max_iters_list = [10, 100, 1000, 10000, 100000]
max_iters_list = [10, 100, 1000, 10000]


for num in max_iters_list:
    
    best_state, best_fitness = mlrose.mimic(problem=problem4, 
                                              pop_size = problem4.length*5, keep_pct = 0.20,
                                               max_attempts = 10, max_iters = int(num),
                                                    random_state = 42)


    print("max_iters:", num, " best_fitness:",best_fitness)


max_iters: 10  best_fitness: 56.0
max_iters: 100  best_fitness: 56.0
max_iters: 1000  best_fitness: 56.0
max_iters: 10000  best_fitness: 56.0


max_attempts

In [67]:
#max_attempts_list = [10, 100, 1000, 10000, 100000]
max_attempts_list = [10, 100, 1000, 10000]


for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.mimic(problem=problem4, 
                                              pop_size = problem4.length*5, keep_pct = 0.20,
                                               max_attempts = int(num), max_iters = 1000,
                                                    random_state = 42)


    print("max_attempts:", num, " best_fitness:",best_fitness)
    

max_attempts: 10  best_fitness: 56.0
max_attempts: 100  best_fitness: 59.0
max_attempts: 1000  best_fitness: 62.0
max_attempts: 10000  best_fitness: 62.0


In [108]:
list_algorithms = [mlrose.random_hill_climb , mlrose.simulated_annealing , mlrose.genetic_alg , mlrose.mimic]

init_state = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

for alg in list_algorithms:
    bs, bf =  alg(problem=problem1, random_state = 42)
    print(alg)
    print(bs)
    print(bf)

<function random_hill_climb at 0x7feee32d0a70>
[0 1 0 0 0 1 0 0 0 1]
0.0
<function simulated_annealing at 0x7feee32d0cb0>
[1 1 1 1 1 1 1 1 0 0]
18.0
<function genetic_alg at 0x7feee32d0f80>
[1 1 0 0 0 0 0 0 0 0]
18.0
<function mimic at 0x7feee32d0830>
[1 1 1 1 1 1 1 1 0 0]
18.0


In [134]:
dict_algs = {
    'RHC': {"algorithm": mlrose.random_hill_climb, 
            "init_state": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
            "restarts": 1},
    'SA': {"algorithm": mlrose.simulated_annealing,
          "init_state": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
           "schedule": mlrose.GeomDecay(init_temp=10, decay=0.95, min_temp=1)},
    'GA': {"algorithm": mlrose.genetic_alg,
          "pop_size": 200,
          "mutation_prob": .1},
    'MIMIC': {"algorithm": mlrose.mimic,
             "pop_size": 200,
             "keep_pct": 0.2}
}

for key, value in dict_algs.items():
    #print(key)
    #print(value)
    #print(dict_algs[alg])
    a = value['algorithm']
    for keys, vals in value.items():
        print(keys, vals)
        

    bs, bf =  a(problem=problem1, random_state = 42)
    print(bs)
    print(bf)
    


algorithm <function random_hill_climb at 0x7feee32d0a70>
init_state [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
restarts 1
[0 1 0 0 0 1 0 0 0 1]
0.0
algorithm <function simulated_annealing at 0x7feee32d0cb0>
init_state [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
schedule <mlrose.decay.GeomDecay object at 0x7feee34230d0>
[1 1 1 1 1 1 1 1 0 0]
18.0
algorithm <function genetic_alg at 0x7feee32d0f80>
pop_size 200
mutation_prob 0.1
[1 1 0 0 0 0 0 0 0 0]
18.0
algorithm <function mimic at 0x7feee32d0830>
pop_size 200
keep_pct 0.2
[1 1 1 1 1 1 1 1 0 0]
18.0
