In [1]:
import numpy as np
import tensorflow as tf 
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout

Using TensorFlow backend.


In [2]:
def load_data(filename, skiprows = 1):
    """
    Function loads data stored in the file filename and returns it as a numpy ndarray.
    
    Inputs:
        filename: given as a string.
        
    Outputs:
        Data contained in the file, returned as a numpy ndarray
    """
    return np.loadtxt(filename, skiprows=skiprows, delimiter=' ')

In [3]:
# Loads training dataset
training = load_data('data/training_data.txt', 1)
X_train = training[:, 1:]
Y_train = training[:,0]

training shape: (20000, 1001)


In [4]:
# Loads testing dataset
# There is no label for testing set 
X_test = load_data('data/test_data.txt', 1)

In [5]:
# Converting Y values to one hot vector
num_classes = 2
Y_train = keras.utils.to_categorical(Y_train, num_classes=num_classes)


Y_train shape: {} (20000,)
Y_train[0] shape: {} ()


In [6]:
# Input size
n_train = X_train.shape[0]
n_words = X_train.shape[1]
n_test = Y_train.shape[0]



X_train shape: {} (20000, 1000)
X_train[0] shape: {} (1000,)
Y_train shape: {} (20000, 2)
Y_train[0] shape: {} (2,)


In [8]:
## Create your own model here given the constraints in the problem
model = Sequential()
model.add(Dense(100, input_shape=(n_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.15))

## Once you one-hot encode the data labels, the line below should be predicting probabilities of each of the 2 classes
model.add(Dense(2))
model.add(Activation('softmax'))

## Printing a summary of the layers and weights in your model
model.summary()

## In the line below we have specified the loss function as 'mse' (Mean Squared Error) because in the above code we did not one-hot encode the labels.
## In your implementation, since you are one-hot encoding the labels, you should use 'categorical_crossentropy' as your loss.
## You will likely have the best results with RMS prop or Adam as your optimizer.  In the line below we use Adadelta
model.compile(loss='categorical_crossentropy',optimizer='RMSprop', metrics=['accuracy'])

fit = model.fit(X_train, Y_train, batch_size=128, epochs=10,
    verbose=1)

## Printing the accuracy of our model, according to the loss function specified in model.compile above
score = model.evaluate(X_train, Y_train, verbose=0)
print('Training score:', score[0])
print('Training accuracy:', score[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               100100    
_________________________________________________________________
activation_3 (Activation)    (None, 100)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 202       
_________________________________________________________________
activation_4 (Activation)    (None, 2)                 0         
Total params: 100,302
Trainable params: 100,302
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training score: 0.1293479