In [1]:
import numpy as np 
import tensorflow as tf 
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers import BatchNormalization
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from keras.utils.np_utils import to_categorical

# Seed the random number generator:
np.random.seed(1)

def load_data(filename, skiprows = 1):
    """
    Function loads data stored in the file filename and returns it as a numpy ndarray.
    
    Inputs:
        filename: given as a string.
        
    Outputs:
        Data contained in the file, returned as a numpy ndarray
    """
    return np.loadtxt(filename, skiprows=skiprows, delimiter=' ')

  return f(*args, **kwds)
Using TensorFlow backend.


In [3]:
X = load_data('training_data.txt')
y = X[:, 0]
X = X[:, 1:]

In [160]:
np.shape(X)

(20000, 1000)

In [161]:
np.shape(y)

(20000,)

In [4]:
def make_model(X_train, y_train, X_test, y_test):
    ## Create your own model here given the constraints in the problem
    model = Sequential() # Use np.reshape instead of this in hw
    model.add(Dense(1000, input_dim = 1000))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(500))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    model.add(Dense(200))
    model.add(Activation('relu'))
    model.add(Dense(2))
    model.add(Activation('softmax'))

    ## Printing a summary of the layers and weights in your model
    #model.summary()
    
    ## In the line below we have specified the loss function as 'mse' (Mean Squared Error) because in the above code we did not one-hot encode the labels.
    ## In your implementation, since you are one-hot encoding the labels, you should use 'categorical_crossentropy' as your loss.
    ## You will likely have the best results with RMS prop or Adam as your optimizer.  In the line below we use Adadelta
    model.compile(loss='categorical_crossentropy',optimizer='Adam', metrics=['accuracy'])
    
    fit = model.fit(X_train, y_train, batch_size=128, epochs=2,
        verbose=1)

    ## Printing the accuracy of our model, according to the loss function specified in model.compile above
    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    
    return score[1], model

In [5]:
kf = KFold(n_splits = 10, shuffle=True)
scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    scores.append(make_model(X_train, y_train, X_test, y_test)[0])

print(np.mean(scores))

Epoch 1/2
Epoch 2/2
Test score: 0.3424795902967453
Test accuracy: 0.852
Epoch 1/2
Epoch 2/2
Test score: 0.3306661276817322
Test accuracy: 0.8515
Epoch 1/2
Epoch 2/2
Test score: 0.3663601670265198
Test accuracy: 0.853
Epoch 1/2
Epoch 2/2
Test score: 0.33210494375228883
Test accuracy: 0.8515
Epoch 1/2
Epoch 2/2
Test score: 0.34632509303092956
Test accuracy: 0.8485
Epoch 1/2
Epoch 2/2
Test score: 0.3564727427959442
Test accuracy: 0.846
Epoch 1/2
Epoch 2/2
Test score: 0.3527608847618103
Test accuracy: 0.85
Epoch 1/2
Epoch 2/2
Test score: 0.36358436560630797
Test accuracy: 0.8385
Epoch 1/2
Epoch 2/2
Test score: 0.3410640499591827
Test accuracy: 0.853
Epoch 1/2
Epoch 2/2
Test score: 0.34036805033683776
Test accuracy: 0.843
0.8487


In [None]:
# ^ mean score: 0.8487

In [166]:
X_test = load_data('test_data.txt')
X = load_data('training_data.txt')
y = X[:, 0]
y = to_categorical(y)
X = X[:, 1:]
m = make_model(X, y, X, y)[1]
pred = m.predict(X_test)

Epoch 1/3
Epoch 2/3
Epoch 3/3
Test score: 0.130459776002
Test accuracy: 0.96495


In [167]:
def format_pred(pred):
    result = [1 if i[0] == 0 else 0 for i in np.round(pred)]
    with open("result.txt", "w") as f:
        f.write("Id,Prediction\n") 
        for i in range(1, len(result) + 1):
            f.write(str(i) + "," + str(result[i-1]) + "\n")

In [168]:
format_pred(pred)