In [2]:
def load_data(train_data, test_data, skiprows = 1):
    '''
    Function loads training and test data stored in input files in the same folder as load_data
    and returns x_train, y_train, and x_test in numpy ndarrays.

    Inputs:
        train_data: training_data filename
        test_data: test_data filename

    Outputs:
        x_train: x values for training set as numpy ndarray
        y_train: labels for x values in training set as numpy ndarray
        x_test: x values for testing set as numpy ndarray
    '''

    train_data = np.loadtxt(train_data, skiprows = skiprows, delimiter = ' ')

    x_train = train_data[:, 1:]
    y_train = train_data[0:,0]

    x_test = np.loadtxt(test_data, skiprows = skiprows, delimiter = ' ')

    return x_train, y_train, x_test

In [3]:
#from process_input import load_data

import numpy as np
import matplotlib.pyplot as plt
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import Flatten, BatchNormalization
from keras import regularizers
from sklearn.grid_search import GridSearchCV
from keras.optimizers import SGD
import pandas

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
train_file = "data/training_data.txt"
test_file = "data/test_data.txt"
x_train, y_train, x_test = load_data(train_file, test_file)

# one-hot encode the labels
y_train = keras.utils.np_utils.to_categorical(y_train)

# normalize input data
x_train = np.divide(x_train, x_train.max())
x_test = np.divide(x_test, x_test.max())

# we must reshape the X data (add a channel dimension)
x_train = x_train.reshape(tuple(list(x_train.shape) + [1]))
x_test = x_test.reshape(tuple(list(x_test.shape) + [1]))

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)


(20000, 1000, 1)
(10000, 1000, 1)
(20000, 2)


In [30]:
def create_model(optim = 'rmsprop', density = 100, rate = 0.15):
    model = Sequential()
    model.add(Flatten(input_shape=(1000,1))) 
    model.add(Dense(density))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(rate))

    model.add(Dense(int(density / 10)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(rate))

    model.add(Dense(int(density / 10)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(2))
    model.add(Activation('softmax'))
    
    model.compile(loss='binary_crossentropy',optimizer=optim, metrics=['accuracy'])
    
    return model

In [39]:
seed = 7
model = KerasClassifier(build_fn=create_model, nb_epoch = 10, batch_size = 64, verbose = 0)
rate = [0.3]
optim = ['nadam']
density = [300]

param_grid = dict(optim = optim, rate = rate, density = density, epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose = 1, cv = 5)
grid_result = grid.fit(x_train, y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
    print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 38.1min finished


Best: 0.836400 using {'density': 300, 'epochs': 20, 'optim': 'nadam', 'rate': 0.3}
0.836400 (0.004689) with: {'density': 300, 'epochs': 20, 'optim': 'nadam', 'rate': 0.3}


In [21]:
len(np.argmax(model.predict(x_test), axis = 1))

20000