In [11]:
# Import the modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
     ------------------------------------ 176.1/176.1 kB 366.1 kB/s eta 0:00:00
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


In [2]:
# Read in the CSV
suicides_df = pd.read_csv("output/suicides.csv")

suicides_df.head()

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides_per_100k_pop,gdp_for_year_USD,gdp_per_capita_USD,generation
0,Albania,1987,male,15-24 years,21,312900,6.71,2156625000.0,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,2156625000.0,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,2156625000.0,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,2156625000.0,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,2156625000.0,796,Boomers


In [3]:
# Make dataframe to compare suicide rate and GDP per capita
suicides_gdp_df = suicides_df[["suicides_per_100k_pop", "gdp_per_capita_USD"]]
suicides_gdp_df.head()

Unnamed: 0,suicides_per_100k_pop,gdp_per_capita_USD
0,6.71,796
1,5.19,796
2,4.83,796
3,4.59,796
4,3.28,796


In [4]:
# Get dummies
country_dummies = pd.get_dummies(suicides_df["country"])
sex_dummies = pd.get_dummies(suicides_df["sex"])
age_dummies = pd.get_dummies(suicides_df["age"])

In [5]:
# Add dummies
suicides_x_df = pd.concat([suicides_gdp_df, country_dummies, sex_dummies, age_dummies], axis=1)
suicides_x_df.head()

Unnamed: 0,suicides_per_100k_pop,gdp_per_capita_USD,Albania,Antigua and Barbuda,Argentina,Armenia,Aruba,Australia,Austria,Azerbaijan,...,Uruguay,Uzbekistan,female,male,15-24 years,25-34 years,35-54 years,5-14 years,55-74 years,75+ years
0,6.71,796,1,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
1,5.19,796,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
2,4.83,796,1,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
3,4.59,796,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
4,3.28,796,1,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0


In [6]:
# Set target and feature sets
y = suicides_gdp_df["suicides_per_100k_pop"].values
x = suicides_x_df.drop(columns="suicides_per_100k_pop").values

In [7]:
# Split training/test datasets
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [8]:
# Preprocess numerical data for neural network

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
x_scaler = scaler.fit(x_train)

# Scale the data
x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [9]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=110,
        step=2), activation=activation, input_dim=110))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=110,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [12]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [14]:
# Run the kerastuner search for best hyperparameters
tuner.search(x_train_scaled,y_train,epochs=20,validation_data=(x_test_scaled,y_test))

Trial 60 Complete [00h 00m 35s]
val_accuracy: 0.0723220705986023

Best val_accuracy So Far: 0.08598130941390991
Total elapsed time: 00h 14m 00s
INFO:tensorflow:Oracle triggered exit


In [15]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 75,
 'num_layers': 2,
 'units_0': 35,
 'units_1': 21,
 'units_2': 21,
 'units_3': 19,
 'units_4': 83,
 'units_5': 5,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0051'}

In [20]:
tuner.results_summary()

Results summary
Results in .\untitled_project
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0054 summary
Hyperparameters:
activation: tanh
first_units: 75
num_layers: 2
units_0: 35
units_1: 21
units_2: 21
units_3: 19
units_4: 83
units_5: 5
tuner/epochs: 20
tuner/initial_epoch: 7
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0051
Score: 0.08598130941390991

Trial 0046 summary
Hyperparameters:
activation: tanh
first_units: 99
num_layers: 2
units_0: 75
units_1: 15
units_2: 51
units_3: 79
units_4: 69
units_5: 11
tuner/epochs: 20
tuner/initial_epoch: 7
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 0042
Score: 0.08540618419647217

Trial 0055 summary
Hyperparameters:
activation: tanh
first_units: 63
num_layers: 5
units_0: 23
units_1: 5
units_2: 45
units_3: 87
units_4: 99
units_5: 67
tuner/epochs: 20
tuner/initial_epoch: 7
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0049
Score: 0.08224298804998398

Trial 0016 summary
Hyperparameters:
activation: tanh
fir

In [21]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 75)                8325      
                                                                 
 dense_1 (Dense)             (None, 35)                2660      
                                                                 
 dense_2 (Dense)             (None, 21)                756       
                                                                 
 dense_3 (Dense)             (None, 1)                 22        
                                                                 
Total params: 11763 (45.95 KB)
Trainable params: 11763 (45.95 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [27]:
# Define the deep learning model 
nn_model_tuned = tf.keras.models.Sequential()
nn_model_tuned.add(tf.keras.layers.Dense(units=75, activation="tanh", input_dim=110))
nn_model_tuned.add(tf.keras.layers.Dense(units=35, activation="tanh"))
nn_model_tuned.add(tf.keras.layers.Dense(units=21, activation="tanh"))
nn_model_tuned.add(tf.keras.layers.Dense(units=1, activation="tanh"))


# Compile the Sequential model together and customize metrics
nn_model_tuned.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [33]:
# Train the model
fit_model_tuned = nn_model_tuned.fit(x_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [34]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model_tuned.evaluate(x_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

218/218 - 0s - loss: -1.7755e+02 - accuracy: 0.0782 - 317ms/epoch - 1ms/step
Loss: -177.55264282226562, Accuracy: 0.07821711152791977
