In [11]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [4]:
df = pd.read_csv('2000-2018_disasters_min50k_for_modeling.csv')
features = ['BEGIN_RANGE', 'END_RANGE', 'MAGNITUDE', 'BEGIN_AZIMUTH',
            'BEGIN_MONTH', 'END_MONTH', 'TOTAL_DURATION_HR', 'DISTANCE_COVERED(KM)', 'BEGIN_HOUR']

In [5]:
hail = df[df['EVENT_TYPE'] == 'Hail']
hail = hail[(hail['DAMAGE_PROPERTY']<1000000) & (hail['TOTAL_DURATION_HR']>0)]

X = hail[features]
X = pd.get_dummies(X)
y = hail['DAMAGE_PROPERTY']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)
X_train.shape

(704, 24)

In [6]:
def model_func(layer_one_neurons=24, layer_one_dropout=.5, layer_two_neurons=10, input_dim=24,layer_two_dropout=.5):    
    model = Sequential()
    model.add(Dense(layer_one_neurons, activation='relu', input_dim=input_dim))
    model.add(Dropout(layer_one_dropout))
    model.add(Dense(layer_two_neurons, activation='relu'))
    model.add(Dropout(layer_two_dropout))
    model.add(Dense(1, activation=None))
    model.compile(loss='mean_absolute_error', optimizer = 'adam')
    return model

In [None]:
# function to calculate r^2 of my neural network
def r_square(X_test,y_test):
    y_pred = model.predict(X_test).reshape(y_test.shape[0])
    y_test = np.array(y_test)
    ss_res = np.sum(np.square(y_test - y_pred))
    y_mean = np.full((1, y_test.shape[0]),y_test.mean())
    ss_tot = np.sum(np.square(y_mean - y_test))
    return 1-ss_res/ss_tot

In [14]:
nn = KerasRegressor(build_fn=model_func, input_dim = X_train.shape[1], verbose = 0)
ss = StandardScaler()

pipe = Pipeline([
    ('ss', ss),
    ('nn', nn)
])

params = {
    'nn__epochs': [500, 1000, 5000],
    'nn__layer_one_dropout': [0.3, 0.5],
    'nn__layer_two_neurons': [50, 100, 200],
    'nn__layer_two_dropout': [0.3, 0.5],
}
gs = GridSearchCV(pipe, param_grid= params, cv = 2)
gs.fit(X_train, y_train)
print(gs.best_score_)
gs.best_params_

-142644.8955965909


{'nn__epochs': 1000,
 'nn__layer_one_dropout': 0.8,
 'nn__layer_one_neurons': 24,
 'nn__layer_two_neurons': 200}

In [19]:
string_to_save = f"{gs.best_params_['nn__epochs']} epochs, {gs.best_params_['nn__layer_one_dropout']} layer_one_dropout, {gs.best_params_['nn__layer_one_neurons']} layer_one_neurons, {gs.best_params_['nn__layer_two_neurons']} layer_two_neurons,"
file = open("sample.txt","w")
file.write(string_to_save)
file.close()

In [None]:
n_epochs = gs.best_params_['nn__epochs']
layer_one_dropout = gs.best_params_['nn__layer_one_dropout']
layer_one_neurons = gs.best_params_['nn__layer_one_neurons']
layer_two_neurons = gs.best_params_['nn__layer_two_neurons']
layer_two_dropout = gs.best_params_['nn__layer_two_dropout']

In [None]:
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [None]:
model = Sequential()

# input layer 
model.add(Dense(24, activation = 'relu', input_dim = X_train.shape[1]))

# adding a hidden layer 
model.add(Dense(layer_one_neurons, activation = 'relu'))

model.add(Dropout(layer_one_dropout))

# adding a hidden layer 
model.add(Dense(layer_two_neurons, activation = 'relu'))

model.add(Dropout(layer_two_dropout))

# output layer
model.add(Dense(1, activation = None))

model.compile(loss = 'mean_absolute_error', optimizer = 'adam')

history = model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = n_epochs, verbose = 0)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.legend();

In [None]:
r_square(X_test, y_test)