# Randomized optimization

Plaigiarism note: I partially took this course in 2020 so some of the analysis and text is repeated.

mlrose procedure:

1. Define a fitness function
- This is the function we want to maximize or minimize, and is used to evaluate the fitness of a state vector.
2. Define an optimization problem object
3. Select and run a randomized optimization algorithm

mlrose fitness functions: https://mlrose.readthedocs.io/en/stable/source/fitness.html

## Load libraries

In [1]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose
import numpy as np
import pandas as pd
import time
from sklearn.preprocessing import normalize
from sklearn.metrics import accuracy_score

## Set directories

In [3]:
directory_hw1 = "/Users/mikepecorino/Documents/machine_learning/HW1/"
directory_hw2 = "/Users/mikepecorino/Documents/machine_learning/HW2/"

## Load inputs

In [9]:
data_all = pd.read_csv(directory_hw1 + "ncaa_all.csv")
train = pd.read_csv(directory_hw1 + "ncaa_train.csv")
valid = pd.read_csv(directory_hw1 + "ncaa_valid.csv")
test = pd.read_csv(directory_hw1 + "ncaa_test.csv")

## Neural Network

### Define features and response variable

### Features list

In [5]:
features = ["game_win_perc_prop", 
            "game_starters_total_minutes_prop", "game_starters_prop_minutes_prop", "game_player_pts_10plus_prop",
            "game_player_pts_15plus_prop", "game_player_pts_16plus_prop", "game_player_pts_17plus_prop",
            "game_player_pts_18plus_prop", "game_player_pts_19plus_prop", "game_player_pts_20plus_prop",
            "game_player_pts_21plus_prop", "game_player_pts_22plus_prop", "game_player_ast_3plus_prop",
            "game_player_ast_5plus_prop", "game_player_ast_7plus_prop",
            "game_player_orb_1plus_prop", "game_player_orb_2plus_prop", "game_player_orb_3plus_prop",
            "game_player_drb_5plus_prop", "game_player_drb_7plus_prop", "game_player_drb_10plus_prop",
            "game_gs_mean_prop", "game_gs_max_prop", "game_pos_prop", "game_pts_prop", "game_efficiency_prop",
            "game_fg_attempted_prop", "game_ft_attempted_prop", "game_ft_made_prop", "game_stl_prop",
            "game_tov_prop","game_stl_tov_ratio_diff", "game_stl_tov_ratio_prop", "game_blk_prop",
            "game_orb_prop", "game_drb_prop", "game_trb_prop", "game_ast_prop", "game_pf_diff", "game_pf_prop",
            "home_indicator.x", "neutral_indicator"]

### Features data

In [12]:
data_all_features = data_all[features]
train_features = train[features]
valid_features = valid[features]
data_cv_features = train_features.append(valid_features)
test_features = test[features]

### Create adjusted response variable

### Response variable

In [13]:
response = "win_indicator"

### Response data

In [21]:
data_all_response = data_all[response]
train_response = train[response]
valid_response = valid[response]
data_cv_response = train_response.append(valid_response)
test_response = test[response]

### Normalize data

In [22]:
data_cv_features_normalized = normalize(data_cv_features)
test_features_normalized = normalize(test_features)

In [None]:
#Inputs for the Neural Network
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
algos = ["random_hill_climb", "simulated_annealing", "genetic_alg", "gradient_descent"]
random_state = 28
pop_size = 200
mutation_prob = 0.1
#Simulated Annealing: decay schedule for temperature
schedule = mlrose.ExpDecay(init_temp = 100,
                           exp_const = .05,
                           min_temp = 1)

#Initialize an empty data frame for recording results
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mlrose_nn = pd.DataFrame(columns = ["algorithm",
                                    "random_restart",
                                    "max_attempt",
                                    "max_iter",
                                    "time",
                                    "function_evaluations"
                                    "train_score",
                                    "test_score"])

#Loop
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#Start an iteration counter
iter = 1

#For each combination of algorithm, max attempt, max iter, and random restarts
for algo in algos:
    for max_attempt in [10]:
        for max_iter in [1, 10, 20, 30, 40, 50]:
            for random_restart in [10]:
                
                #Print message
                print("Working on iter:", iter,
                      "Algorithm:", algo,
                      "Random restart:", random_restart,
                      "Max attempt:", max_attempt,
                      "Max iter:", max_iter)
                
                #Start the timer
                start = time.time()
        
                #Create the model object
                nn_model = mlrose.NeuralNetwork(hidden_nodes = [1500],
                                                activation = "relu",
                                                algorithm = algo,
                                                max_iters = max_iter,
                                                bias = True,
                                                is_classifier = True,
                                                learning_rate = 0.0001,
                                                early_stopping = True,
                                                clip_max = 2,
                                                max_attempts = max_attempt,
                                                random_state = random_state,
                                                pop_size = pop_size,
                                                mutation_prob = mutation_prob,
                                                schedule = schedule,
                                                restarts = random_restart,
                                                curve = True)
            
                
                #Fit the model
                nn_model.fit(data_cv_features_normalized, data_cv_response)
                
                #Get the number of function evaluations
                function_evaluations = np.argmax(nn_model.fitness_curve) + 1
                
                #End the timer
                end = time.time()
                
                #Get the total model fitting time
                fit_time = end - start
                
                #Score the model on train and test data
                train_pred = nn_model.predict(data_cv_features_normalized)
                train_score = accuracy_score(data_cv_response, train_pred)
                test_pred = nn_model.predict(test_features_normalized)
                test_score = accuracy_score(test_response, test_pred)
                
                #Add to results list
                mlrose_nn = mlrose_nn.append({"algorithm": algo,
                                              "random_restart": random_restart,
                                              "max_attempt": max_attempt,
                                              "max_iter": max_iter,
                                              "time": fit_time,
                                              "function_evaluations": function_evaluations,
                                              "train_score": train_score,
                                              "test_score": test_score},
                                             ignore_index = True)
                
                #Increment the iteration counter
                iter = iter + 1
                print("Done in time:", fit_time, "with test score:", test_score)
                print("\n")

#Done
print("Done")

#Output
mlrose_nn.to_csv(directory_hw2 + "sensor_randomized_opt_neural_net_ncaa.csv", index = False)

Working on iter: 1 Algorithm: random_hill_climb Random restart: 10 Max attempt: 10 Max iter: 1
Done in time: 28.751885175704956 with test score: 0.5545939165882722


Working on iter: 2 Algorithm: random_hill_climb Random restart: 10 Max attempt: 10 Max iter: 10
Done in time: 186.17395567893982 with test score: 0.5545939165882722


Working on iter: 3 Algorithm: random_hill_climb Random restart: 10 Max attempt: 10 Max iter: 20
Done in time: 357.0073547363281 with test score: 0.5545939165882722


Working on iter: 4 Algorithm: random_hill_climb Random restart: 10 Max attempt: 10 Max iter: 30
Done in time: 504.7341079711914 with test score: 0.5545939165882722


Working on iter: 5 Algorithm: random_hill_climb Random restart: 10 Max attempt: 10 Max iter: 40
Done in time: 707.4204082489014 with test score: 0.5545939165882722


Working on iter: 6 Algorithm: random_hill_climb Random restart: 10 Max attempt: 10 Max iter: 50
Done in time: 845.0975711345673 with test score: 0.5545939165882722


Wor