In [1]:
# Import the modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
!pip install keras-tuner



In [2]:
# Read in the CSV
suicides_df = pd.read_csv("output/suicides.csv")

suicides_df.head()

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides_per_100k_pop,gdp_for_year_USD,gdp_per_capita_USD,generation,suicide_tier
0,Albania,1987,male,15-24 years,21,312900,6.71,2156625000.0,796,Generation X,0
1,Albania,1987,male,35-54 years,16,308000,5.19,2156625000.0,796,Silent,0
2,Albania,1987,female,15-24 years,14,289700,4.83,2156625000.0,796,Generation X,0
3,Albania,1987,male,75+ years,1,21800,4.59,2156625000.0,796,G.I. Generation,0
4,Albania,1987,male,25-34 years,9,274300,3.28,2156625000.0,796,Boomers,0


In [3]:
# Make dataframe to compare suicide rate and GDP per capita
suicides_gdp_df = suicides_df[["gdp_per_capita_USD", "suicide_tier"]]
suicides_gdp_df.head()

Unnamed: 0,gdp_per_capita_USD,suicide_tier
0,796,0
1,796,0
2,796,0
3,796,0
4,796,0


In [4]:
# Get dummies
country_dummies = pd.get_dummies(suicides_df["country"])
sex_dummies = pd.get_dummies(suicides_df["sex"])
age_dummies = pd.get_dummies(suicides_df["age"])

In [5]:
# Add dummies
suicides_x_df = pd.concat([suicides_gdp_df, country_dummies, sex_dummies, age_dummies], axis=1)
suicides_x_df.head()

Unnamed: 0,gdp_per_capita_USD,suicide_tier,Albania,Antigua and Barbuda,Argentina,Armenia,Aruba,Australia,Austria,Azerbaijan,...,Uruguay,Uzbekistan,female,male,15-24 years,25-34 years,35-54 years,5-14 years,55-74 years,75+ years
0,796,0,1,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
1,796,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
2,796,0,1,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
3,796,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
4,796,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0


In [6]:
# Set target and feature sets
y = suicides_gdp_df["suicide_tier"].values
x = suicides_x_df.drop(columns="suicide_tier").values

In [7]:
# Split training/test datasets
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [8]:
# Preprocess numerical data for neural network

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
x_scaler = scaler.fit(x_train)

# Scale the data
x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [9]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=110,
        step=2), activation=activation, input_dim=110))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=110,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [10]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [11]:
# Run the kerastuner search for best hyperparameters
tuner.search(
    x_train_scaled,
    y_train,epochs=20,
    validation_data=(x_test_scaled,y_test)
)

Trial 60 Complete [00h 00m 41s]
val_accuracy: 0.8007189035415649

Best val_accuracy So Far: 0.8700215816497803
Total elapsed time: 00h 13m 42s
INFO:tensorflow:Oracle triggered exit


In [12]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 57,
 'num_layers': 1,
 'units_0': 39,
 'units_1': 39,
 'units_2': 89,
 'units_3': 89,
 'units_4': 109,
 'units_5': 23,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [13]:
tuner.results_summary()

Results summary
Results in .\untitled_project
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0027 summary
Hyperparameters:
activation: tanh
first_units: 57
num_layers: 1
units_0: 39
units_1: 39
units_2: 89
units_3: 89
units_4: 109
units_5: 23
tuner/epochs: 20
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.8700215816497803

Trial 0056 summary
Hyperparameters:
activation: relu
first_units: 69
num_layers: 5
units_0: 35
units_1: 99
units_2: 81
units_3: 69
units_4: 3
units_5: 57
tuner/epochs: 20
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.869877815246582

Trial 0024 summary
Hyperparameters:
activation: tanh
first_units: 43
num_layers: 3
units_0: 7
units_1: 19
units_2: 1
units_3: 25
units_4: 5
units_5: 11
tuner/epochs: 20
tuner/initial_epoch: 7
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0019
Score: 0.869015097618103

Trial 0017 summary
Hyperparameters:
activation: tanh
first_units: 59
num_layers: 1
units_0: 13
units_1: 

In [17]:
best_model = tuner.get_best_models(1)[0]
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 57)                6327      
                                                                 
 dense_1 (Dense)             (None, 39)                2262      
                                                                 
 dense_2 (Dense)             (None, 1)                 40        
                                                                 
Total params: 8629 (33.71 KB)
Trainable params: 8629 (33.71 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [21]:
# Define the deep learning model 
nn_model_tuned = tf.keras.models.Sequential()
nn_model_tuned.add(tf.keras.layers.Dense(units=57, activation="tanh", input_dim=110))
nn_model_tuned.add(tf.keras.layers.Dense(units=39, activation="tanh"))
nn_model_tuned.add(tf.keras.layers.Dense(units=1, activation="tanh"))


# Compile the Sequential model together and customize metrics
nn_model_tuned.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [22]:
# Train the model
fit_model_tuned = nn_model_tuned.fit(x_train_scaled, y_train, epochs=100)

Epoch 1/100


ValueError: in user code:

    File "C:\Users\ejmat\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\ejmat\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\ejmat\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\ejmat\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\ejmat\anaconda3\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\ejmat\anaconda3\lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 57), found shape=(None, 110)


In [20]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model_tuned.evaluate(x_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

218/218 - 0s - loss: -1.5396e+00 - accuracy: 0.8670 - 262ms/epoch - 1ms/step
Loss: -1.5396398305892944, Accuracy: 0.8670021295547485
