## Prepare Data

In [63]:
#!pip install keras-tuner

# Import our dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [64]:
#read the csv
ML_Data= pd.read_csv('Data/Five_Year_Data.csv')

In [65]:
ML_Data.columns

Index(['GEO_ID', 'Below_9th_grade', '9_12th_grade', 'High_School_Grad',
       'Some_College', 'Associate's_degree', 'Bachelor's_degree',
       'Graduate_degree', 'Population (16+)', 'Employed',
       'Median Household Income', 'Per capita Income', 'Unemployment Rate',
       'Professional (Occupation)', 'Service (Occupation)',
       'Sales (Occupation)', 'Construction (Occupation)',
       'Production (Occupation)', 'Salary (Class)', 'Gov't (Class)',
       'Self-employed (Class)', 'Fam-unpaid (Class)', 'Poverty',
       'Child-poverty', 'Median Home value', 'Median Mortgage', 'Median Rent',
       'Total Population', 'White', 'Black', 'American Indian', 'Asian',
       'Pacific Islander', 'Hispanic/Latino', 'County', 'State', 'Year'],
      dtype='object')

In [66]:
#Unemployment model: Educational background and demographic features
Unemployment_data= ML_Data[['Below_9th_grade', '9_12th_grade', 'High_School_Grad',
       'Some_College', "Associate's_degree", "Bachelor's_degree",
       'Graduate_degree', 'Population (16+)','Unemployment Rate','Total Population', 'White', 'Black', 'American Indian', 'Asian',
       'Pacific Islander', 'Hispanic/Latino', 'State', 'Year' ]]
Unemployment_data.head()

Unnamed: 0,Below_9th_grade,9_12th_grade,High_School_Grad,Some_College,Associate's_degree,Bachelor's_degree,Graduate_degree,Population (16+),Unemployment Rate,Total Population,White,Black,American Indian,Asian,Pacific Islander,Hispanic/Latino,State,Year
0,2.6,8.7,32.6,20.3,8.1,15.9,11.8,43368.0,4.2,55200,76.9,19.1,0.3,1.0,0.1,2.8,Alabama,2018
1,2.7,7.0,27.6,22.0,9.4,20.7,10.6,167712.0,4.4,208107,86.3,9.5,0.7,0.8,0.0,4.5,Alabama,2018
2,8.2,18.8,35.7,18.1,7.0,7.8,4.4,20948.0,9.5,25782,47.4,47.6,0.3,0.4,0.0,4.3,Alabama,2018
3,5.7,11.1,47.3,18.6,5.8,7.6,3.9,18470.0,7.5,22527,76.7,22.3,0.0,0.2,0.0,2.4,Alabama,2018
4,7.5,12.4,34.0,21.4,12.0,8.1,4.5,45915.0,4.1,57645,95.5,1.5,0.2,0.3,0.0,9.1,Alabama,2018


## One Hot Encoder
taking care of categorical data (non-numerical)

In [67]:
Unemployment_data.dtypes

Below_9th_grade       float64
9_12th_grade          float64
High_School_Grad      float64
Some_College          float64
Associate's_degree    float64
Bachelor's_degree     float64
Graduate_degree       float64
Population (16+)      float64
Unemployment Rate     float64
Total Population        int64
White                 float64
Black                 float64
American Indian       float64
Asian                 float64
Pacific Islander      float64
Hispanic/Latino       float64
State                  object
Year                    int64
dtype: object

In [68]:
# Generate our categorical variable lists. #checks for all cetegories that are objects
state_cat = Unemployment_data.dtypes[Unemployment_data.dtypes == "object"].index.tolist()

In [69]:
# Check the number of unique values in each column
Unemployment_data[state_cat].nunique()

State    51
dtype: int64

In [70]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse_output=False)
# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(Unemployment_data[state_cat]))
# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(state_cat)
encode_df.head()

Unnamed: 0,State_Alabama,State_Alaska,State_Arizona,State_Arkansas,State_California,State_Colorado,State_Connecticut,State_Delaware,State_District of Columbia,State_Florida,...,State_South Dakota,State_Tennessee,State_Texas,State_Utah,State_Vermont,State_Virginia,State_Washington,State_West Virginia,State_Wisconsin,State_Wyoming
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
# Merge one-hot encoded features and drop the originals
Unemployment_data=Unemployment_data.merge(encode_df,left_index=True, right_index=True)
Unemployment_data= Unemployment_data.drop(state_cat, axis=1)
Unemployment_data.head()

Unnamed: 0,Below_9th_grade,9_12th_grade,High_School_Grad,Some_College,Associate's_degree,Bachelor's_degree,Graduate_degree,Population (16+),Unemployment Rate,Total Population,...,State_South Dakota,State_Tennessee,State_Texas,State_Utah,State_Vermont,State_Virginia,State_Washington,State_West Virginia,State_Wisconsin,State_Wyoming
0,2.6,8.7,32.6,20.3,8.1,15.9,11.8,43368.0,4.2,55200,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.7,7.0,27.6,22.0,9.4,20.7,10.6,167712.0,4.4,208107,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8.2,18.8,35.7,18.1,7.0,7.8,4.4,20948.0,9.5,25782,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5.7,11.1,47.3,18.6,5.8,7.6,3.9,18470.0,7.5,22527,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,7.5,12.4,34.0,21.4,12.0,8.1,4.5,45915.0,4.1,57645,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# NN Model

In [72]:
# Split target column and Features Columns
y = Unemployment_data['Unemployment Rate']
X = Unemployment_data.drop(columns='Unemployment Rate')

In [73]:
# Use sklearn to split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [74]:
# Create scaler instance
scaler = StandardScaler()
# Fit the Standardscaler
scaler.fit(X_train)
# Scale the data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [75]:
input_dimensions = X_train_scaled.shape[1]
input_dimensions

67

## Auto Optimizer

In [76]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim = input_dimensions))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    return nn_model

In [77]:
# Import the kerastuner library
import keras_tuner as kt
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

Reloading Tuner from ./untitled_project/tuner0.json


In [78]:
X_train_scaled.shape

(11739, 67)

In [79]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 56 Complete [00h 00m 54s]
val_accuracy: 0.0022994379978626966

Best val_accuracy So Far: 0.0022994379978626966
Total elapsed time: 00h 30m 35s


In [80]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 5,
 'num_layers': 3,
 'units_0': 1,
 'units_1': 7,
 'units_2': 7,
 'units_3': 5,
 'units_4': 7,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0}

In [81]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 5, 'num_layers': 3, 'units_0': 1, 'units_1': 7, 'units_2': 7, 'units_3': 5, 'units_4': 7, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}
{'activation': 'sigmoid', 'first_units': 7, 'num_layers': 2, 'units_0': 1, 'units_1': 1, 'units_2': 7, 'units_3': 5, 'units_4': 7, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}
{'activation': 'relu', 'first_units': 3, 'num_layers': 1, 'units_0': 9, 'units_1': 9, 'units_2': 5, 'units_3': 3, 'units_4': 1, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}


In [82]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


123/123 - 1s - 8ms/step - accuracy: 0.0023 - loss: -3.3478e+02
Loss: -334.7835388183594, Accuracy: 0.0022994379978626966
