In [160]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os
pd.set_option('max_columns', None)
pd.set_option('max_rows', None)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score

In [161]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose

## Part 1

What is an optimization problem? 

- We aim to find the best state, according to some objective function. 
- An example of a state is the weights used in a machine learning model, such as a neural network
- State is represented by a one-dimensional vector 
- "Best" state is defined using the Cost Function/Fitness Function/Objective Function 
- Function takes in a State Array as Input and returns a Fitness Value as Output


We will make 3 steps to solve an optimization problem with mlrose:
 - 1) Define fitness function object
 - 2) Define an optimization problem object
 - 3) Select and run a randomized optimization algorithm

#### 1) Define Fitness Function Object

In [179]:


import random

random.seed(42)
tuple_list = []

length = 100
for i in range(0, length):    
    a = random.randint(1, length-1)
    b = random.randint(1, length-1)
    tuple_list.append((a,b))

print(tuple_list)
print(len(tuple_list))

fitness2 = mlrose.MaxKColor(edges=tuple_list)


[(82, 15), (4, 95), (36, 32), (29, 18), (95, 14), (87, 95), (70, 12), (76, 55), (5, 4), (12, 28), (30, 65), (78, 4), (72, 26), (92, 84), (90, 70), (54, 29), (58, 76), (36, 1), (98, 21), (90, 55), (44, 36), (20, 28), (98, 44), (14, 12), (49, 13), (46, 45), (78, 34), (6, 94), (59, 69), (16, 49), (11, 71), (38, 81), (80, 47), (74, 25), (91, 9), (6, 85), (30, 99), (38, 11), (30, 13), (49, 36), (59, 82), (47, 21), (48, 46), (27, 86), (35, 90), (88, 83), (10, 78), (82, 22), (69, 94), (32, 21), (60, 49), (35, 82), (89, 72), (29, 88), (42, 99), (8, 30), (5, 41), (52, 35), (9, 28), (73, 92), (41, 28), (84, 64), (51, 83), (59, 19), (34, 18), (32, 96), (72, 69), (34, 96), (75, 55), (75, 52), (47, 29), (18, 66), (64, 12), (97, 7), (15, 20), (81, 21), (88, 55), (77, 9), (50, 49), (77, 60), (68, 33), (71, 2), (88, 93), (15, 88), (69, 97), (35, 99), (83, 44), (15, 38), (56, 21), (59, 1), (93, 93), (34, 65), (98, 23), (65, 14), (81, 39), (82, 65), (78, 26), (20, 48), (98, 21), (70, 68)]
100


#### 2) Define an Optimization Problem Object

In [180]:
#this requires a LENGTH of Edges - we can tune this 

problem2 = mlrose.DiscreteOpt(length = length, fitness_fn = fitness2, maximize = True, max_val = 2)

In [181]:
problem2.length

100

#### 3) Select and Run Randomized Optimization Algorithm

##### Find Optimzal Parameters for RHC, SA, GA, Mimic for the K Color Problem 

##### Random Hill Climbing

restarts

In [184]:
restarts_list = np.arange(0, 11, 1)


restarts_probs = [.25, .50, .75, 1.0, 2.0]
problem_length = np.array(problem2.length)
restarts_list2 = np.dot(restarts_probs, problem_length)


restarts_list_full =  np.concatenate((restarts_list, restarts_list2))

for num in restarts_list_full:
#for num in restarts_list:    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem2, restarts = int(num),
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)
    #print(best_state)
    print("restarts:",num, "best_fitness:",best_fitness)


restarts: 0.0 best_fitness: 63.0
restarts: 1.0 best_fitness: 63.0
restarts: 2.0 best_fitness: 63.0
restarts: 3.0 best_fitness: 63.0
restarts: 4.0 best_fitness: 67.0
restarts: 5.0 best_fitness: 70.0
restarts: 6.0 best_fitness: 70.0
restarts: 7.0 best_fitness: 81.0
restarts: 8.0 best_fitness: 81.0
restarts: 9.0 best_fitness: 81.0
restarts: 10.0 best_fitness: 81.0
restarts: 25.0 best_fitness: 81.0
restarts: 50.0 best_fitness: 81.0
restarts: 75.0 best_fitness: 81.0
restarts: 100.0 best_fitness: 81.0
restarts: 200.0 best_fitness: 81.0


max_attempts

In [107]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem2, restarts = 1,
                                                   max_attempts = int(num), max_iters = 1000,
                                                   random_state = 42)
    #print(best_state)
    print(best_fitness)


63.0
88.0
84.0
84.0
84.0


max_iters

In [108]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.random_hill_climb(problem=problem2, restarts = 1,
                                                   max_attempts = 10, max_iters = int(num),
                                                    random_state = 42)
    #print(best_state)
    print(best_fitness)


58.0
63.0
63.0
63.0
63.0


##### Simulated Annealing

schedule

In [109]:
scheduleG = mlrose.GeomDecay()
scheduleA = mlrose.ArithDecay()
scheduleE = mlrose.ExpDecay()

best_state, best_fitness = mlrose.simulated_annealing(problem=problem2, schedule = scheduleG,
                                               max_attempts = 10, max_iters = 1000,
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

74.0


In [110]:
best_state, best_fitness = mlrose.simulated_annealing(problem=problem2, schedule = scheduleA,
                                               max_attempts = 10, max_iters = 1000,
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

71.0


In [111]:
best_state, best_fitness = mlrose.simulated_annealing(problem=problem2, schedule = scheduleE,
                                               max_attempts = 10, max_iters = 1000,
                                                    random_state = 42)

#print(best_state)

print(best_fitness)

83.0


Max Attempts

In [112]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.simulated_annealing(problem=problem2, schedule = scheduleG,
                                                   max_attempts = num, max_iters = 1000,
                                                        random_state = 42)

    #print(best_state)
    print(best_fitness)


74.0
91.0
91.0
91.0
91.0


  prob = np.exp(delta_e/temp)


Max Iters

In [113]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.simulated_annealing(problem=problem2, schedule = scheduleG,
                                                   max_attempts = 10, max_iters = num,
                                                        random_state = 42)
    
    #print(best_state)
    print(best_fitness)


59.0
73.0
74.0
74.0
74.0


##### Genetic Alg

In [114]:
best_state, best_fitness = mlrose.genetic_alg(problem=problem2, 
                                              pop_size = 200, mutation_prob = 0.1,
                                              max_attempts = 10, max_iters = 1000,
                                               random_state = 42)


print(best_fitness)

67.0


pop_size

In [115]:
pop_size_list = [.10, .25, .50, .75, 1.0, 1.50, 2.0, 2.50, 3.0, 5.0]
problem_length = problem2.length

for num in pop_size_list:
    size =  problem_length*num
    best_state, best_fitness = mlrose.genetic_alg(problem=problem2, 
                                                  pop_size = int(size), mutation_prob = 0.1,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("pop_size:", size, " best_fitness:", best_fitness)


pop_size: 10.0  best_fitness: 62.0
pop_size: 25.0  best_fitness: 65.0
pop_size: 50.0  best_fitness: 67.0
pop_size: 75.0  best_fitness: 69.0
pop_size: 100.0  best_fitness: 66.0
pop_size: 150.0  best_fitness: 68.0
pop_size: 200.0  best_fitness: 67.0
pop_size: 250.0  best_fitness: 69.0
pop_size: 300.0  best_fitness: 68.0
pop_size: 500.0  best_fitness: 67.0


mutation_prob

In [116]:
mutation_prob_list = np.arange(0.1, 1, 0.1) 

for num in mutation_prob_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem2, 
                                                  pop_size = 200, mutation_prob = num,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("mutation_prob:", num, "best_fitness:", best_fitness)

mutation_prob: 0.1 best_fitness: 67.0
mutation_prob: 0.2 best_fitness: 68.0
mutation_prob: 0.30000000000000004 best_fitness: 67.0
mutation_prob: 0.4 best_fitness: 64.0
mutation_prob: 0.5 best_fitness: 70.0
mutation_prob: 0.6 best_fitness: 67.0
mutation_prob: 0.7000000000000001 best_fitness: 69.0
mutation_prob: 0.8 best_fitness: 65.0
mutation_prob: 0.9 best_fitness: 67.0


max_iters

In [117]:
max_iters_list = [10, 100, 1000, 10000, 100000]

for num in max_iters_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem2, 
                                                  pop_size = 5*problem2.length, mutation_prob = 0.1,
                                                   max_attempts = 10, max_iters = num,
                                                        random_state = 42)

    print("max_iters",num ," best_fitness:", best_fitness)


max_iters 10  best_fitness: 67.0
max_iters 100  best_fitness: 67.0
max_iters 1000  best_fitness: 67.0
max_iters 10000  best_fitness: 67.0
max_iters 100000  best_fitness: 67.0


max_attempts

In [118]:
max_attempts_list = [10, 100, 1000, 10000, 100000]

for num in max_attempts_list:
    best_state, best_fitness = mlrose.genetic_alg(problem=problem2, 
                                                  pop_size = 5*problem2.length, mutation_prob = 0.1,
                                                   max_attempts = num, max_iters = 1000,
                                                    random_state = 42)

    print("max_attempts",num ," best_fitness:", best_fitness)


max_attempts 10  best_fitness: 67.0
max_attempts 100  best_fitness: 72.0
max_attempts 1000  best_fitness: 77.0
max_attempts 10000  best_fitness: 77.0
max_attempts 100000  best_fitness: 77.0


##### Mimic 

keep_pct

In [119]:
keep_pct_list = np.arange(0.1, 1, 0.1) 

for num in keep_pct_list:
    best_state, best_fitness = mlrose.mimic(problem=problem2, 
                                                  pop_size = 200, keep_pct = num,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)

    print("keep_pct:", num," best_fitness:",best_fitness)

keep_pct: 0.1  best_fitness: 88.0
keep_pct: 0.2  best_fitness: 88.0
keep_pct: 0.30000000000000004  best_fitness: 89.0
keep_pct: 0.4  best_fitness: 86.0
keep_pct: 0.5  best_fitness: 90.0
keep_pct: 0.6  best_fitness: 89.0
keep_pct: 0.7000000000000001  best_fitness: 70.0
keep_pct: 0.8  best_fitness: 87.0
keep_pct: 0.9  best_fitness: 72.0


pop_size

In [120]:
pop_size_list = [.10, .25, .50, .75, 1.0, 1.50, 2.0, 2.50, 3.0, 5.0]
problem_length = problem2.length


for num in pop_size_list:
    size =  num * problem_length
    
    best_state, best_fitness = mlrose.mimic(problem=problem2, 
                                                  pop_size = int(size), keep_pct = 0.20,
                                                   max_attempts = 10, max_iters = 1000,
                                                        random_state = 42)


    print("pop_size:", size, " best_fitness:",best_fitness)

pop_size: 10.0  best_fitness: 57.0
pop_size: 25.0  best_fitness: 70.0
pop_size: 50.0  best_fitness: 74.0
pop_size: 75.0  best_fitness: 78.0
pop_size: 100.0  best_fitness: 80.0
pop_size: 150.0  best_fitness: 85.0
pop_size: 200.0  best_fitness: 88.0
pop_size: 250.0  best_fitness: 88.0
pop_size: 300.0  best_fitness: 94.0
pop_size: 500.0  best_fitness: 92.0


max_iters

In [185]:
#max_iters_list = [10, 100, 1000, 10000, 100000]
max_iters_list = [10, 100, 1000, 10000]

for num in max_iters_list:
    
    best_state, best_fitness = mlrose.mimic(problem=problem2, 
                                              pop_size = problem2.length*3, keep_pct = 0.20,
                                               max_attempts = 10, max_iters = int(num),
                                                    random_state = 42)


    print("max_iters:", num, " best_fitness:",best_fitness)


max_iters: 10  best_fitness: 86.0
max_iters: 100  best_fitness: 94.0
max_iters: 1000  best_fitness: 94.0
max_iters: 10000  best_fitness: 94.0


max_attempts

In [122]:
#max_attempts_list = [10, 100, 1000, 10000, 100000]
max_attempts_list = [10, 100, 1000]

for num in max_attempts_list:
    
    best_state, best_fitness = mlrose.mimic(problem=problem2, 
                                              pop_size = problem2.length*5, keep_pct = 0.20,
                                               max_attempts = int(num), max_iters = 1000,
                                                    random_state = 42)


    print("max_attempts:", num, " best_fitness:",best_fitness)
    

max_attempts: 10  best_fitness: 92.0
max_attempts: 100  best_fitness: 92.0
max_attempts: 1000  best_fitness: 92.0
