In [35]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
from cross_validate import CrossValidation
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers import BatchNormalization
%matplotlib inline

In [7]:
# Load data
x_train = pickle.load(open("x_train.p", "rb"))
y_train = pickle.load(open("y_train.p", "rb"))

# Try to fit on the lasso'd data as well
x_train_1 = np.load(open("lasso_data/x_train_lasso_1.p", "rb"))
x_train_2 = np.load(open("lasso_data/x_train_lasso_2.p", "rb"))
x_train_3 = np.load(open("lasso_data/x_train_lasso_3.p", "rb"))
x_train_4 = np.load(open("lasso_data/x_train_lasso_4.p", "rb"))
x_train_5 = np.load(open("lasso_data/x_train_lasso_5.p", "rb"))
y_train = np.load(open("y_train.p", "rb"))

In [93]:
# Create a neural network with x_data and y_data (automatically divides x_data and y_data into
# test and training set).
# The number of nodes per layer are defined by layers.
# Activation function, number of epochs, dropout, and loss can be defined
# Returns the model and the accuracy
def run_neural_network(x_data, y_data, k_fold_cross_validation, layers, activation, num_epochs, dropout, batch_size):
    cross_validated_data = CrossValidation(x_data, y_data, k_fold_cross_validation)
    
    x_train, y_train = cross_validated_data.get_other_partitions(0)
    x_test, y_test = cross_validated_data.get_partition(0)
    
    # Create the model 
    model = Sequential()
    model.add(Dense(layers[0], input_dim = len(x_train[0])))
    model.add(Activation(activation))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    for i in range(1, len(layers)):
        model.add(Dense(layers[i]))
        model.add(Activation(activation))
        model.add(Dropout(dropout))
        model.add(BatchNormalization())
        
    model.add(Dense(1))    
    model.add(Activation("sigmoid"))
    
    model.summary()
    
    model.compile(loss = "binary_crossentropy", optimizer = 'RMSProp', metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size = batch_size, epochs = num_epochs, \
              validation_data=(x_test, y_test), verbose=1)
    score = model.evaluate(x_test, y_test)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    
    return model, score[1]

In [83]:
# Create a neural network with x_data and y_data (automatically divides x_data and y_data into
# test and training set).
# The number of nodes per layer are defined by layers.
# Activation function, number of epochs, dropout, and loss can be defined
# Cross validation is undefined here
def create_neural_network(x_data, y_data, k_fold_cross_validation, layers, activation, num_epochs, dropout, batch_size):
    cross_validated_data = CrossValidation(x_data, y_data, k_fold_cross_validation)
    
    x_train, y_train = x_data, y_data
    
    # Create the model 
    model = Sequential()
    model.add(Dense(layers[0], input_dim = len(x_train[0])))
    model.add(Activation(activation))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    for i in range(1, len(layers)):
        model.add(Dense(layers[i]))
        model.add(Activation(activation))
        model.add(Dropout(dropout))
        model.add(BatchNormalization())
        
    model.add(Dense(1))    
    model.add(Activation("sigmoid"))
    
    model.summary()
    
    model.compile(loss = "binary_crossentropy", optimizer = 'RMSProp', metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size = batch_size, epochs = num_epochs, verbose=1)
    
    return model

In [84]:
# Same as above function, but does cross validation and suppresses most prints
# Returns just the mean test accuracy
def run_test(x_data, y_data, k_fold_cross_validation, layers, activation, num_epochs, dropout, batch_size):
    cross_validated_data = CrossValidation(x_data, y_data, k_fold_cross_validation)
    
    scores = []
    
    for partition in range(k_fold_cross_validation):
    
        x_train, y_train = cross_validated_data.get_other_partitions(partition)
        x_test, y_test = cross_validated_data.get_partition(partition)

        # Create the model 
        model = Sequential()
        model.add(Dense(layers[0], input_dim = len(x_train[0])))
        model.add(Activation(activation))
        model.add(Dropout(dropout))
        model.add(BatchNormalization())
        for i in range(1, len(layers)):
            model.add(Dense(layers[i]))
            model.add(Activation(activation))
            model.add(Dropout(dropout))
            model.add(BatchNormalization())

        model.add(Dense(1))    
        model.add(Activation("sigmoid"))

        model.compile(loss = "binary_crossentropy", optimizer = 'RMSProp', metrics=['accuracy'])
        model.fit(x_train, y_train, batch_size = batch_size, epochs = num_epochs, \
                  validation_data=(x_test, y_test), verbose = 0)
        train_score = model.evaluate(x_train, y_train, verbose = 0)
        score = model.evaluate(x_test, y_test, verbose = 0)
        print('On partition', partition)
        print('Training accuracy:', train_score[1])
        print('Test accuracy:', score[1])
        scores.append(score[1])
    
    print("Average score was", np.mean(scores))
    return np.mean(scores)

In [48]:
# First submission
model = create_neural_network(x_train_3, y_train, 10, [50, 25], "elu", , 0.2, 20)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_374 (Dense)            (None, 50)                38300     
_________________________________________________________________
activation_374 (Activation)  (None, 50)                0         
_________________________________________________________________
dropout_250 (Dropout)        (None, 50)                0         
_________________________________________________________________
batch_normalization_27 (Batc (None, 50)                200       
_________________________________________________________________
dense_375 (Dense)            (None, 25)                1275      
_________________________________________________________________
activation_375 (Activation)  (None, 25)                0         
_________________________________________________________________
dropout_251 (Dropout)        (None, 25)                0         
__________

In [89]:
# Second submission
model = create_neural_network(x_train_3, y_train, 5, [50, 50], "elu", 9, 0.2, 20)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_574 (Dense)            (None, 50)                38300     
_________________________________________________________________
activation_575 (Activation)  (None, 50)                0         
_________________________________________________________________
dropout_375 (Dropout)        (None, 50)                0         
_________________________________________________________________
batch_normalization_141 (Bat (None, 50)                200       
_________________________________________________________________
dense_575 (Dense)            (None, 50)                2550      
_________________________________________________________________
activation_576 (Activation)  (None, 50)                0         
_________________________________________________________________
dropout_376 (Dropout)        (None, 50)                0         
__________

In [94]:
# Third submission
model, dummy = run_neural_network(x_train_3, y_train, 5, [100, 50, 50], "elu", 9, 0.3, 20)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_584 (Dense)            (None, 100)               76600     
_________________________________________________________________
activation_585 (Activation)  (None, 100)               0         
_________________________________________________________________
dropout_382 (Dropout)        (None, 100)               0         
_________________________________________________________________
batch_normalization_145 (Bat (None, 100)               400       
_________________________________________________________________
dense_585 (Dense)            (None, 50)                5050      
_________________________________________________________________
activation_586 (Activation)  (None, 50)                0         
_________________________________________________________________
dropout_383 (Dropout)        (None, 50)                0         
__________

In [95]:
x_test_3 = np.load(open("lasso_data/x_test_lasso_3.p", "rb"))
predictions = model.predict(x_test_3, batch_size = 20, verbose = 1)

print(predictions)

for i in range(1, len(predictions)):
    prediction = predictions[i]
    if prediction < 0.5:
        predictions[i] = 0
    else:
        predictions[i] = 1

ids = range(1, len(predictions) + 1)
output = []
for i in range(len(ids)):
    output.append([ids[i], int(predictions[i])])
    
# Output the submission file
np.savetxt(open("models/neural_network.txt", "wb"), output, delimiter = ",", newline = "\n", fmt = "%i", \
           header = "Id,Prediction", comments = "")

[[ 0.86995578]
 [ 0.96968645]
 [ 0.37563279]
 ..., 
 [ 0.0577778 ]
 [ 0.95205295]
 [ 0.10870164]]
