In [1]:
!pip install keras-tuner



In [2]:
# Import our dependencies
import pandas as pd
import matplotlib as plt
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

## 1. Import & Inspect the Data

In [3]:
# Connect to Google Drive
from google.colab import drive 
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
# Loading data
tornado_df = pd.read_csv("gdrive/My Drive/Colab Notebooks/Final_Project/2008-2020_tornadoes_EF_cleaned.csv")
tornado_df

Unnamed: 0,Year,Month,Day,Timestamp,State,State_Fips,County_Fips,Start_Lat,Start_Lon,End_Lat,End_Lon,EF,Injuries,Fatalities,Loss,Crop_Loss,Length,Width
0,2008,10,11,2008-10-11 18:33:00,NM,35,49,35.1500,-105.9400,0.0000,0.0000,0,0,0,12000.0,,0.02,10
1,2008,10,11,2008-10-11 21:54:00,KS,20,181,39.3300,-101.5500,0.0000,0.0000,0,0,0,,,0.25,10
2,2008,10,24,2008-10-24 16:55:00,GA,13,39,31.0200,-81.8100,0.0000,0.0000,0,0,0,1000.0,,0.25,25
3,2008,10,6,2008-10-06 13:46:00,AR,5,113,34.5800,-94.1500,34.6300,-94.1200,1,0,0,75000.0,,3.41,250
4,2008,10,6,2008-10-06 16:05:00,TX,48,67,33.0700,-94.1000,33.0900,-94.0800,0,0,0,75000.0,,1.50,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15886,2020,9,1,2020-09-01 16:24:00,TN,47,95,36.3290,-89.4981,36.3517,-89.4729,0,0,0,,2000.0,2.10,50
15887,2020,9,3,2020-09-03 15:57:00,MD,24,3,38.9480,-76.5695,38.9233,-76.4615,0,0,0,1250000.0,,6.05,100
15888,2020,9,5,2020-09-05 16:17:00,CA,6,39,37.3500,-119.3400,37.5218,-119.3056,2,0,0,,,12.02,50
15889,2020,9,5,2020-09-05 18:29:00,CA,6,19,37.2500,-119.2000,37.2598,-119.1838,1,0,0,,,1.12,25


In [5]:
# drop na columns
tornado_df = tornado_df.dropna()

### 1. Define input data (X) and output data (y)

In [6]:
X = tornado_df.copy()
X = X.drop(['Year', 'Month', 'Day', 'Timestamp','State','Start_Lat','Start_Lon','End_Lat', 'End_Lon', 'Fatalities','State_Fips','County_Fips','Loss','Crop_Loss'], axis=1)
X

Unnamed: 0,EF,Injuries,Length,Width
22,1,0,8.13,200
28,2,2,4.09,150
29,3,4,8.19,100
73,1,0,10.59,500
81,1,0,3.14,250
...,...,...,...,...
15764,0,0,1.26,50
15785,1,1,0.26,50
15786,1,0,2.17,100
15838,0,0,2.58,100


In [7]:
y = tornado_df["Fatalities"].ravel()
y[:10]

array([0, 1, 1, 0, 0, 0, 0, 0, 0, 0])

### 2. Splitting into Train and Test sets

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y,  random_state =78)

### 3. Scale the input data (X)

In [9]:
# Creating StandardScaler instance
scaler = StandardScaler()
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### 4. Define Optimizer

In [10]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=len(X.columns) ) )

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [11]:
# Import the kerastuner library
import kerastuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Oracle from existing project ./untitled_project/oracle.json
INFO:tensorflow:Reloading Tuner from ./untitled_project/tuner0.json


  


In [12]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

INFO:tensorflow:Oracle triggered exit


In [13]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 3,
 'num_layers': 2,
 'tuner/bracket': 2,
 'tuner/epochs': 7,
 'tuner/initial_epoch': 3,
 'tuner/round': 1,
 'tuner/trial_id': '0032',
 'units_0': 9,
 'units_1': 3,
 'units_2': 7,
 'units_3': 1,
 'units_4': 5,
 'units_5': 1}

In [14]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 1s - loss: 0.5662 - accuracy: 0.9625 - 550ms/epoch - 110ms/step
Loss: 0.5661673545837402, Accuracy: 0.9624999761581421
