In [3]:
# Import the modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf

In [2]:
# Read in the CSV
suicides_df = pd.read_csv("output/suicides.csv")

suicides_df.head()

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides_per_100k_pop,gdp_for_year_USD,gdp_per_capita_USD,generation
0,Albania,1987,male,15-24 years,21,312900,6.71,2156625000.0,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,2156625000.0,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,2156625000.0,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,2156625000.0,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,2156625000.0,796,Boomers


In [10]:
# Get dummies
country_dummies = pd.get_dummies(suicides_df["country"])
sex_dummies = pd.get_dummies(suicides_df["sex"])
age_dummies = pd.get_dummies(suicides_df["age"])
generation_dummies = pd.get_dummies(suicides_df["generation"])

In [68]:
# Create features dataframe from dummies
suicides_x_df = suicides_df[["year", "suicides_no", "population", "suicides_per_100k_pop", "gdp_for_year_USD", "gdp_per_capita_USD"]]

suicides_x_df = pd.concat([suicides_x_df, sex_dummies, age_dummies], axis=1)
# suicides_x_df = pd.concat([suicides_x_df, sex_dummies, age_dummies, country_dummies], axis=1)
suicides_x_df.head()

Unnamed: 0,year,suicides_no,population,suicides_per_100k_pop,gdp_for_year_USD,gdp_per_capita_USD,female,male,15-24 years,25-34 years,35-54 years,5-14 years,55-74 years,75+ years
0,1987,21,312900,6.71,2156625000.0,796,0,1,1,0,0,0,0,0
1,1987,16,308000,5.19,2156625000.0,796,0,1,0,0,1,0,0,0
2,1987,14,289700,4.83,2156625000.0,796,1,0,1,0,0,0,0,0
3,1987,1,21800,4.59,2156625000.0,796,0,1,0,0,0,0,0,1
4,1987,9,274300,3.28,2156625000.0,796,0,1,0,1,0,0,0,0


In [69]:
# Set target and feature sets
y = suicides_df["suicides_per_100k_pop"].values
x = suicides_x_df.drop(columns="suicides_per_100k_pop").values

In [70]:
# Split training/test datasets
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [71]:
# Preprocess numerical data for neural network

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
x_scaler = scaler.fit(x_train)

# Scale the data
x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [84]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=13, activation="relu", input_dim=13))
nn_model.add(tf.keras.layers.Dense(units=7, activation="linear"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=["accuracy"])

In [85]:
# Train the model
fit_model = nn_model.fit(x_train_scaled, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [86]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(x_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

218/218 - 0s - loss: -8.2962e+05 - accuracy: 0.0279 - 241ms/epoch - 1ms/step
Loss: -829616.3125, Accuracy: 0.02789360098540783
