In [104]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os
pd.set_option('max_columns', None)
pd.set_option('max_rows', None)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score

In [4]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose

What is an optimization problem? 

- We aim to find the best state, according to some objective function. 
- An example of a state is the weights used in a machine learning model, such as a neural network
- State is represented by a one-dimensional vector 
- "Best" state is defined using the Cost Function/Fitness Function/Objective Function 
- Function takes in a State Array as Input and returns a Fitness Value as Output


We will make 3 steps to solve an optimization problem with mlrose:
 - 1) Define fitness function object
 - 2) Define an optimization problem object
 - 3) Select and run a randomized optimization algorithm

### Tutorial

#### Example: 8 Queens 

Purpose: Given a chess board with 8 queens, place the queens such that none of them can attack each other. 
    An optimal solution will have one queen per column. 
    Our State Vector has 8 inputs, for the 8 rows that we are placing our queens in. The First Queen is column 1 and so on

#### 1) Define Fitness Function Object

This fitness function evaluates the number of pairs of attacking queens for a given state and tries to MINIMIZE this function 

In [6]:
# Use pre built fitness function: https://mlrose.readthedocs.io/en/stable/source/fitness.html#fitness
fitness = mlrose.Queens()

#### 2) Define an optimization problem object

We pick either Discrete, Continous, or Traveling Salesman types of optimization objects. This problem expects a state vector. We choose discrete. 

We are solving a minimization problem, length of 8 inputs, using fitness function as above.

In [7]:
problem = mlrose.DiscreteOpt(length = 8, fitness_fn = fitness, maximize = False, max_val = 8)

#### 3) Select and run Randomized Optimization Algorithm

Simulated Annealing requires a schedule object, to explain how the Temperature value changes over time, 
https://mlrose.readthedocs.io/en/stable/source/decay.html#decay
Decay of Temp can be Exponential, Geometric, or Arthimetic


We also need a max_attempts value, for number of attemps the algorithm should make to find a "better" state at each step 

We also need a max_iters value, for number of iterations the algo should run overall

We can provide a starting state in the init_state

##### Simulated Annealing

In [11]:
# Schedule Decay 
schedule = mlrose.ExpDecay()

# Initial Starting Point
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve Problem with Simulated annealing 
best_state, best_fitness = mlrose.simulated_annealing(problem, schedule=schedule,
                                                     max_attempts=10, max_iters=1000,
                                                     init_state = init_state, random_state = 1)

print(best_state)

print(best_fitness)

[6 4 7 3 6 2 5 1]
2.0


The fitness value of "2" means there are still 2 pairs of attacking queens

Change max_attempts from 10 to 100

In [12]:
# Schedule Decay 
schedule = mlrose.ExpDecay()

# Initial Starting Point
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve Problem with Simulated annealing 
best_state, best_fitness = mlrose.simulated_annealing(problem, schedule=schedule,
                                                     max_attempts=100, max_iters=1000,
                                                     init_state = init_state, random_state = 1)

print(best_state)

print(best_fitness)

[4 1 3 5 7 2 0 6]
0.0


By increasing the max_attempts, we get an optimal solution

Add the input curve

In [14]:
# Schedule Decay 
schedule = mlrose.ExpDecay()

# Initial Starting Point
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve Problem with Simulated annealing 
best_state, best_fitness, fitness_curve = mlrose.simulated_annealing(problem, schedule=schedule,
                                                     max_attempts=100, max_iters=1000,
                                                     init_state = init_state, random_state = 1,
                                                     curve=True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[4 1 3 5 7 2 0 6]
0.0


###### Genetic Algorithm

In [17]:
best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size = 200, mutation_prob = .10, 
                                               max_attempts = 10, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[2 5 1 5 3 0 4 7]
3.0


In [18]:
best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size = 200, mutation_prob = .10, 
                                               max_attempts = 100, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[5 2 6 1 3 7 3 7]
2.0


In [19]:
best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size = 200, mutation_prob = .10, 
                                               max_attempts = 1000, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[6 3 5 0 4 1 7 2]
1.0


In [20]:
best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size = 200, mutation_prob = .20, 
                                               max_attempts = 1000, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[6 4 2 0 5 7 1 3]
0.0


In [21]:
best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size = 200, mutation_prob = .20, 
                                               max_attempts = 100, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[3 6 4 7 0 0 2 5]
2.0


##### MIMIC

In [22]:
best_state, best_fitness, fitness_curve = mlrose.mimic(problem, pop_size = 200, keep_pct = .20, 
                                               max_attempts = 10, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[3 6 4 1 5 0 2 4]
1.0


In [23]:
best_state, best_fitness, fitness_curve = mlrose.mimic(problem, pop_size = 200, keep_pct = .20, 
                                               max_attempts = 100, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[3 6 4 1 5 0 2 4]
1.0


In [24]:
best_state, best_fitness, fitness_curve = mlrose.mimic(problem, pop_size = 200, keep_pct = .10, 
                                               max_attempts = 10, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[5 7 0 0 6 1 6 2]
2.0


In [26]:
best_state, best_fitness, fitness_curve = mlrose.mimic(problem, pop_size = 200, keep_pct = .50, 
                                               max_attempts = 10, max_iters = 1000,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[2 4 7 3 0 6 1 5]
0.0


##### Random Hill Climbing 

In [28]:
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, restarts=0,
                                               max_attempts = 10, max_iters = 1000,
                                               init_state=init_state,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[0 3 5 1 6 4 2 7]
3.0


In [29]:
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, restarts=1,
                                               max_attempts = 10, max_iters = 1000,
                                               init_state=init_state,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[0 3 5 1 6 4 2 7]
3.0


In [32]:
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, restarts=0,
                                               max_attempts = 1000, max_iters = 1000,
                                               init_state=init_state,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[0 2 5 1 6 4 2 7]
2.0


In [35]:
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, restarts=5,
                                               max_attempts = 10000, max_iters = 1000,
                                               init_state=init_state,
                                               random_state = 42, curve = True)

print(best_state)

print(best_fitness)

#print(fitness_curve)

[5 1 4 7 0 6 3 5]
1.0


Variables to Modify: 

All of them: 
   - max_iters
   - max_attempts 
    
RHC: 
   - init_state: discrete 
   - restarts: default 0, range variable  
    
SA: 
   - init_state: discrete
   - schedule: Exponential, Geometric, Arithmetic
    
GA: 
   - pop_size: default 200, range variable  
   - mutation_prob: default 0.1, range (0,1)
    
Mimic: 
   - pop_size: default: 200, range variable 
   - keep_pct: default 0.2, range (0,1)

#### Tutorial - Machine Learning Weight Optimization Problems 

 Steps to Neural Network  Weight Optimization Problem 
 
 - 1) Initialize a ML weight optimization problem object
 - 2) Find optimal weights for a given training dataset by calling the fit method of object init in step 1 
 - 3) Predict labels for test dataset calling the predict method of obj from step 1

In [36]:
from sklearn.datasets import load_iris

In [41]:
# data is a dictionary
data = load_iris()

In [42]:
data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [38]:
print(data.data[0])

[5.1 3.5 1.4 0.2]


In [40]:
print(data.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [43]:
print(data.target[0])

0


In [44]:
print(data.target_names[data.target[0]])

setosa


In [47]:
print(np.unique(data.target))

[0 1 2]


In [49]:
print(data.target_names[0])
print(data.target_names[1])
print(data.target_names[2])

setosa
versicolor
virginica


In [55]:
## Prep Data 
X_Train, X_Test, Y_Train, Y_Test = train_test_split(data.data, data.target, 
                                                   test_size = 0.2, random_state = 3)

# Normalize Feature Data 
scaler = MinMaxScaler()

X_Train_Scaled = scaler.fit_transform(X_Train)
X_Test_Scaled = scaler.fit_transform(X_Test)

# One Hot Encode Target Values 
one_hot = OneHotEncoder()

Y_Train_Hot = one_hot.fit_transform(Y_Train.reshape(-1,1)).todense()
Y_Test_Hot = one_hot.fit_transform(Y_Test.reshape(-1,1)).todense()


Make Neural Network

##### Random Hill Climb

In [56]:
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                algorithm = 'random_hill_climb', max_iters=1000,
                                bias = True, is_classifier = True, learning_rate = 0.0001,
                                early_stopping = True, clip_max = 5, max_attempts = 100, 
                                random_state = 3)

nn_model1.fit(X_Train_Scaled, Y_Train_Hot)

NeuralNetwork(clip_max=5, early_stopping=True, hidden_nodes=[2],
              learning_rate=0.0001, max_iters=1000)

Use Neural Network to Predict on Test and Train data

In [59]:
# predict labels for Training Set 
Y_Train_Pred = nn_model1.predict(X_Train_Scaled)

Y_Train_Accuracy = accuracy_score(Y_Train_Hot, Y_Train_Pred)

print(Y_Train_Accuracy)

#predict labels for Testing Set 
Y_Test_Pred = nn_model1.predict(X_Test_Scaled)

Y_Test_Accuracy = accuracy_score(Y_Test_Hot, Y_Test_Pred)

print(Y_Test_Accuracy)

0.45
0.5666666666666667


##### Gradient Descent - same as BackPropogation, similar to Sklearn

In [60]:
nn_model2 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                algorithm = 'gradient_descent', max_iters=1000,
                                bias = True, is_classifier = True, learning_rate = 0.0001,
                                early_stopping = True, clip_max = 5, max_attempts = 100, 
                                random_state = 3)

nn_model2.fit(X_Train_Scaled, Y_Train_Hot)

NeuralNetwork(clip_max=5, early_stopping=True, hidden_nodes=[2],
              learning_rate=0.0001, max_iters=1000)

In [61]:
# predict labels for Training Set 
Y_Train_Pred = nn_model2.predict(X_Train_Scaled)

Y_Train_Accuracy = accuracy_score(Y_Train_Hot, Y_Train_Pred)

print(Y_Train_Accuracy)

#predict labels for Testing Set 
Y_Test_Pred = nn_model2.predict(X_Test_Scaled)

Y_Test_Accuracy = accuracy_score(Y_Test_Hot, Y_Test_Pred)

print(Y_Test_Accuracy)

0.625
0.6333333333333333


##### Simulated Annealing

In [63]:
nn_model3 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                algorithm = 'simulated_annealing', max_iters=1000,
                                bias = True, is_classifier = True, learning_rate = 0.0001,
                                early_stopping = True, clip_max = 5, max_attempts = 100, 
                                random_state = 3,
                                schedule = schedule)

nn_model3.fit(X_Train_Scaled, Y_Train_Hot)

NeuralNetwork(clip_max=5, early_stopping=True, hidden_nodes=[2],
              learning_rate=0.0001, max_iters=1000,
              schedule=<mlrose.decay.ExpDecay object at 0x7fc12546f390>)

In [64]:
# predict labels for Training Set 
Y_Train_Pred = nn_model3.predict(X_Train_Scaled)

Y_Train_Accuracy = accuracy_score(Y_Train_Hot, Y_Train_Pred)

print(Y_Train_Accuracy)

#predict labels for Testing Set 
Y_Test_Pred = nn_model3.predict(X_Test_Scaled)

Y_Test_Accuracy = accuracy_score(Y_Test_Hot, Y_Test_Pred)

print(Y_Test_Accuracy)

0.4583333333333333
0.5666666666666667


##### Genetic Algorithm

In [65]:
nn_model4 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                algorithm = 'genetic_alg', max_iters=1000,
                                bias = True, is_classifier = True, learning_rate = 0.0001,
                                early_stopping = True, clip_max = 5, max_attempts = 100, 
                                random_state = 3,
                                pop_size=200)

nn_model4.fit(X_Train_Scaled, Y_Train_Hot)

NeuralNetwork(clip_max=5, early_stopping=True, hidden_nodes=[2],
              learning_rate=0.0001, max_iters=1000)

In [66]:
# predict labels for Training Set 
Y_Train_Pred = nn_model4.predict(X_Train_Scaled)

Y_Train_Accuracy = accuracy_score(Y_Train_Hot, Y_Train_Pred)

print(Y_Train_Accuracy)

#predict labels for Testing Set 
Y_Test_Pred = nn_model4.predict(X_Test_Scaled)

Y_Test_Accuracy = accuracy_score(Y_Test_Hot, Y_Test_Pred)

print(Y_Test_Accuracy)

0.9083333333333333
0.8666666666666667


TODO: read this documentation, try each algo: https://mlrose.readthedocs.io/en/stable/source/neural.html

In [67]:
nn_model4.fitted_weights

array([ 1.67729476,  0.85173743, -0.86436999, -3.04406682,  1.57281632,
        1.20068369,  3.41331195, -0.42434971, -1.93211357,  2.93124779,
       -4.73201025,  0.43811079,  4.92900583,  1.47776978,  0.78182121,
       -2.37719733])

In [68]:
nn_model4.loss

0.35066250312650965

In [70]:
print(nn_model1.loss)
print(nn_model2.loss)
print(nn_model3.loss)
print(nn_model4.loss)

1.064378274038072
0.8551592021027957
1.065387410570692
0.35066250312650965


### Data Import

In [73]:
data_polling = pd.read_stata("Data/cumulative_2006_2018.dta")

In [85]:
inputs = ['year','st','gender', 'age', 'race', 'hispanic', 'educ', 'faminc', 'marstat', 'economy_retro', 'newsint','approval_pres']
targets = ['pid3']

keep = inputs + targets

data_polling = data_polling[(data_polling.year == 2018) & (data_polling.pid3.isin(['Democrat','Republican']) )]

polling = data_polling[keep]


In [86]:
polling.dropna(axis=0, how='any',inplace=True)

In [87]:
pd.options.mode.chained_assignment = None  # default='warn'

polling['age_band'] = np.NaN
polling.loc[ (polling['age'] >= 18) & (polling['age'] <= 29), 'age_band'] = 'Young'
polling.loc[ (polling['age'] >= 30) & (polling['age'] <= 49), 'age_band'] = 'Lower-Middle'
polling.loc[ (polling['age'] >= 50) & (polling['age'] <= 69), 'age_band'] = 'Upper-Middle'
polling.loc[ (polling['age'] >= 70) , 'age_band'] = 'Old'

In [88]:
polling.loc[ polling['pid3'] == "Democrat", "Dem_Ind"] = 1
polling.loc[ polling['pid3'] == 'Republican', "Dem_Ind"] = 0
polling['Dem_Ind'] = polling['Dem_Ind'].astype(int)

In [89]:
categorical_polling = ['st','gender', 'age_band', 'race', 'hispanic', 'educ', 'faminc', 'marstat', 'economy_retro', 'newsint','approval_pres']


In [90]:
polling.head()

Unnamed: 0,year,st,gender,age,race,hispanic,educ,faminc,marstat,economy_retro,newsint,approval_pres,pid3,age_band,Dem_Ind
392756,2018,OH,Female,58,White,No,Post-Grad,150k+,Married,Stayed About The Same,Only Now And Then,Approve / Somewhat Approve,Republican,Upper-Middle,0
392757,2018,KY,Female,66,White,No,Post-Grad,10k - 20k,Divorced,Stayed About The Same,Most Of The Time,Strongly Disapprove,Democrat,Upper-Middle,1
392758,2018,AZ,Female,88,White,No,High School Graduate,20k - 30k,Widowed,Gotten Much Better,Some Of The Time,Approve / Somewhat Approve,Republican,Old,0
392760,2018,NC,Male,71,White,No,High School Graduate,50k - 60k,Married,Gotten Much Better,Some Of The Time,Strongly Approve,Republican,Old,0
392761,2018,PA,Male,86,White,No,4-Year,60k - 70k,Widowed,Gotten Better / Somewhat Better,Most Of The Time,Strongly Approve,Republican,Old,0


In [93]:
small_polling = polling.sample(n=10000, random_state = 42)

In [94]:
X_polling = small_polling[categorical_polling]

y_polling = small_polling.loc[:,'Dem_Ind']


General Use Functions

In [95]:
from sklearn.preprocessing import OneHotEncoder

def prep_data(X, y, categoricalVars):

    encoder = OneHotEncoder(sparse=False)
    X_encoded = pd.DataFrame(encoder.fit_transform(X[categoricalVars]))
    X_encoded.columns = encoder.get_feature_names(categoricalVars)
    X.drop(categoricalVars, axis = 1, inplace = True)
    if X.shape[1] > 0:
        OH_X = pd.concat([X, pd.DataFrame(X_encoded, index=X.index)], axis = 1)
    else:
        OH_X = X_encoded 
        
    X_Train, X_Test, Y_Train, Y_Test = train_test_split(OH_X, y, test_size = .30, train_size = .70, random_state = 42)
    return(X_Train, X_Test, Y_Train, Y_Test)

In [99]:
X_Train_Polling, X_Test_Polling, Y_Train_Polling, Y_Test_Polling = prep_data(X_polling, y_polling, categorical_polling)


###### Random Hill Climbing

In [102]:
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                algorithm = 'random_hill_climb', max_iters=1000,
                                bias = True, is_classifier=True, learning_rate = 0.0001,
                                early_stopping = True, clip_max = 5, max_attempts = 100,
                                random_state = 42)

nn_model1.fit(X_Train_Polling, Y_Train_Polling)

NeuralNetwork(clip_max=5, early_stopping=True, hidden_nodes=[2],
              learning_rate=0.0001, max_iters=1000)

In [105]:
# predict labels for Training Set 
Y_Train_Pred = nn_model1.predict(X_Train_Polling)

Y_Train_Accuracy = accuracy_score(Y_Train_Polling, Y_Train_Pred)
Y_Train_F1_Score = f1_score(Y_Train_Polling, Y_Train_Pred)

print(Y_Train_Accuracy)
print(Y_Train_F1_Score)

#predict labels for Testing Set 
Y_Test_Pred = nn_model1.predict(X_Test_Polling)

Y_Test_Accuracy = accuracy_score(Y_Test_Polling, Y_Test_Pred)
Y_Test_F1_Score = f1_score(Y_Test_Polling, Y_Test_Pred)

print(Y_Test_Accuracy)
print(Y_Test_F1_Score)

0.45685714285714285
0.4402237926972909
0.4613333333333333
0.4408304498269896


In [108]:
nn_model2 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'identity',
                                algorithm = 'gradient_descent', max_iters=1000,
                                bias = True, is_classifier=True, learning_rate = 0.0001,
                                early_stopping = True, clip_max = 5, max_attempts = 100,
                                random_state = 42)

nn_model2.fit(X_Train_Polling, Y_Train_Polling)

NeuralNetwork(clip_max=5, early_stopping=True, hidden_nodes=[2],
              learning_rate=0.0001, max_iters=1000)

In [109]:
# predict labels for Training Set 
Y_Train_Pred = nn_model2.predict(X_Train_Polling)

Y_Train_Accuracy = accuracy_score(Y_Train_Polling, Y_Train_Pred)
Y_Train_F1_Score = f1_score(Y_Train_Polling, Y_Train_Pred)

print(Y_Train_Accuracy)
print(Y_Train_F1_Score)

#predict labels for Testing Set 
Y_Test_Pred = nn_model2.predict(X_Test_Polling)

Y_Test_Accuracy = accuracy_score(Y_Test_Polling, Y_Test_Pred)
Y_Test_F1_Score = f1_score(Y_Test_Polling, Y_Test_Pred)

print(Y_Test_Accuracy)
print(Y_Test_F1_Score)

0.9158571428571428
0.9260329021725481
0.9146666666666666
0.9253644314868805
