In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
import useful_functions as uf

#load_transform_split(fpath='data/ALL_YEARS_ADDED_FEATURES.csv',
#                         target='DROPOUT_N', expand=False, split=0.1, clean=True,
#                         drop_feats=['SCHOOL_YEAR','DIV_NAME','SCH_NAME','DIPLOMA_RATE'],
#                         fmt='numpy',return_pipeline=False):

X_train_scaled, X_test_scaled, y_train, y_test = uf.load_transform_split(target='DROPOUT_N')

In [2]:
# global variables about whether or not to modify the hyperparameters
USE_BATCH_NORMALIZATION = False
USE_ADAM_OPTIMIZER = True
USE_DROPOUT_REGULARIZATION = False
USE_SPECIAL_INITIALIZATION = True
USE_LR_SCHEDULER = True

# some of the modifications to the hyperparams -- don't really expect these to change
ACTIVATOR = 'selu' # the one suggested by slides 21 & 44 on Lecture 11
INITIALIZER = 'he_normal' # the one suggested by slides 17 & 44 on Lecture 11

# some more modifications to the hyperparams -- probably change these later on
# for now, set them to the default values
DROPOUT_RATE = 0.0
ADAM_LR = 0.001
ADAM_BETA1 = 0.9
ADAM_BETA2 = 0.999
NUM_LAYERS = 10
N_EPOCHS = 30
USE_EARLY_STOPPING = True
LR_10FOLD_DECAY_TIME = 20 # decay by a factor of 10 every X epochs

In [3]:
import tensorflow as tf
from tensorflow import keras

def initialize_model(use_drop=USE_DROPOUT_REGULARIZATION, use_batch=USE_BATCH_NORMALIZATION,
                    drop_rate=DROPOUT_RATE, num_layers=NUM_LAYERS,
                    use_init=USE_SPECIAL_INITIALIZATION, init=INITIALIZER, activ=ACTIVATOR):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=X_train_scaled.shape[1:]))
        
    for i in np.arange(19.0, 1, num_layers/(-18)):
        j = int(i)
        if use_drop:
            model.add(keras.layers.Dropout(rate=drop_rate))
        if use_init:
            model.add(keras.layers.Dense(j, activation=activ, kernel_initializer=init))
        else:
            model.add(keras.layers.Dense(j, activation=activ))
        if use_batch:
            model.add(keras.layers.BatchNormalization())
    if use_init:
        model.add(keras.layers.Dense(1, kernel_initializer=init))
    else:
        model.add(keras.layers.Dense(1))
    return(model)

In [4]:
def compile_model(model, use_adam=USE_ADAM_OPTIMIZER, lr=ADAM_LR, b1=ADAM_BETA1, b2=ADAM_BETA2):
    if use_adam:
        optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=b1, beta_2=b2)
        model.compile(loss='mean_squared_error', optimizer=optimizer)
    else:
        model.compile(loss='mean_squared_error')
    return(model)

In [5]:
def fit_model(model, use_sched=USE_LR_SCHEDULER, decay_time=LR_10FOLD_DECAY_TIME, init=ADAM_LR,
              use_adam=USE_ADAM_OPTIMIZER, n_epo=N_EPOCHS, stop_early=USE_EARLY_STOPPING):
    if not use_adam or not use_sched:
        history=model.fit(X_train_scaled, y_train, epochs=n_epo)
    else:
        callbacks = []
        if use_sched:
            def lr_fn(epoch): return(init * 10**(-epoch/decay_time))
            callbacks.append(keras.callbacks.LearningRateScheduler(lr_fn))
        if stop_early:
            callbacks.append(keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True))
        history = model.fit(X_train_scaled, y_train, epochs=n_epo, callbacks=callbacks, validation_split=0.2)
    return(history, model)

In [6]:
from sklearn.metrics import mean_squared_error
def analyze_performance(history, model):
    y_pred = model.predict(X_test_scaled)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print('RMSE: %.2f' %rmse)
    return(rmse)

In [7]:
# can I just do this use *args or something? IDK
def run(use_drop=USE_DROPOUT_REGULARIZATION, use_batch=USE_BATCH_NORMALIZATION,
        drop_rate=DROPOUT_RATE, num_layers=NUM_LAYERS,
        use_init=USE_SPECIAL_INITIALIZATION, init=INITIALIZER, activ=ACTIVATOR,
        use_adam=USE_ADAM_OPTIMIZER, lr=ADAM_LR, b1=ADAM_BETA1, b2=ADAM_BETA2,
        use_sched=USE_LR_SCHEDULER, decay_time=LR_10FOLD_DECAY_TIME, n_epo=N_EPOCHS, stop_early=USE_EARLY_STOPPING):
    
    model = initialize_model(use_drop, use_batch, drop_rate, num_layers, use_init, init, activ)
    model = compile_model(model, use_adam, lr, b1, b2)
    history, model = fit_model(model, use_sched, decay_time, lr, use_adam, n_epo, stop_early)
    rmse = analyze_performance(history, model)
    return(history, model, rmse)

In [8]:
history, model, rmse = run(n_epo=100, decay_time=30)

Train on 51890 samples, validate on 12973 samples
Epoch 1/100


KeyboardInterrupt: 

In [9]:
# learn faster -- the loss was decreasing pretty slowly before
history, model, rmse = run(lr=0.001)

Train on 51890 samples, validate on 12973 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RMSE: 1.44


In [10]:
from joblib import load,dump

In [12]:
model.save('saved_models/good_neual_net.h5')

In [19]:
import pickle
fname = 'good_neural_net'
pickle.dump(model, open(fname, 'wb'))

TypeError: can't pickle _thread.RLock objects

In [20]:
model.save(fname)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


FailedPreconditionError: good_neural_net/variables; Not a directory

In [11]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(19, activation='relu', input_shape=X_train_scaled.shape[1:]))
model.add(keras.layers.BatchNormalization())
for i in range(17, 1, -2):
    model.add(keras.layers.Dense(i, activation='relu'))
    model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(1))

model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam())
history = model.fit(X_train_scaled, y_train, epochs=20, validation_split=0.2)

Train on 51890 samples, validate on 12973 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
from sklearn.metrics import mean_squared_error
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE: %.3f' %rmse)

RMSE: 1.429


In [9]:
# beta(1,2) correspond to the importance of (momentum, scale(?))
# defaults are lr = 0.001, beta1 = 0.9, beta2 = 0.999
optimizer = keras.optimizers.Adam(learning_rate=ADAM_LR, beta_1=ADAM_BETA1, beta_2=ADAM_BETA2)
model.compile(loss='mean_squared_error', optimizer=optimizer)

# adjust the learning rate
def lr_fn(epoch, init=ADAM_LR, decay_time=LR_10FOLD_DECAY_TIME):
    return(init * 10**(-epoch/decay_time))
lr_scheduler = keras.callbacks.LearningRateScheduler(lr_fn)
history = model.fit(X_train_scaled, y_train, epochs=N_EPOCHS, callbacks=[lr_scheduler])

NameError: name 'model' is not defined

In [None]:
import tensorflow as tf
from tensorflow import keras

model = keras.models.Sequential()
if USE_DROPOUT_REGULARIZATION:
    model.add(keras.layers.Dropout(rate=DROPOUT_RATE, input_shape=X_train_scaled.shape[1:]))
model.add(keras.layers.Dense(19, activation=ACTIVATOR, kernel_initializer=INITIALIZER))
if USE_BATCH_NORMALIZATION:
    model.add(keras.layers.BatchNormalization())
for i in np.arange(19, 1, -18/NUM_NODES): # always start with 19 and end before 1
    j = int(i)
    model.add(keras.layers.Dropout(rate=DROPOUT_RATE))
    model.add(keras.layers.Dense(j, activation=ACTIVATOR, kernel_initializer=INITIALIZER))
    if USE_BATCH_NORMALIZATION:
        model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(1, kernel_initializer=INITIALIZER)) # the final, predicting layer
model.summary()

In [None]:
from sklearn.metrics import mean_squared_error
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(rmse)

In [10]:
keras.__version__

'2.2.4-tf'

In [9]:
tf.__version__

'2.1.0'

In [None]:
help(keras.optimizers.Adam)

In [None]:
for i in np.arange(1, 10, 1.5):
    print(i)

In [None]:
for i in np.arange(19, 1, -18/NUM_NODES):
    print(i)

In [None]:
NUM_NODES