In [None]:
 from keras.datasets import reuters

In [None]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(
num_words=10000)

In [None]:
# How many training and test samples?
print len(train_data)
print len(test_data)

In [None]:
# Each news wire is encoded as a list of word indices
print train_data[2]

In [None]:
# Decoding a news wire  - note indices are offset by 3 - 0 is for padding,
# 1 is for start of sequence, 2 is for unknown

word_index = reuters.get_word_index()

def decode_news(news):
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    decoded_newswire = ' '.join([reverse_word_index.get(i - 3, '?') for i in news])
    return decoded_newswire

In [None]:
decoded_newswire = decode_news(train_data[0])
print decoded_newswire

In [None]:
# Labels are a topic index between 0 and 45
print train_labels[0]

In [None]:
# We vectorize the representation as a one hot encoding
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

In [None]:
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [None]:
# The output is categorical data which we will also one hot encode.

# Note: there's a built in way to do this in keras - use 
# to_categorical from keras.utils.np_utils
def to_one_hot(labels, dimension=46):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1.
    return results

In [None]:
one_hot_train_labels = to_one_hot(train_labels)
one_hot_test_labels = to_one_hot(test_labels)

In [None]:
# Build the network
from keras import models
from keras import layers

def create_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',input_shape=(10000,)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(46,activation='softmax'))
    return model

def compile_model(model):
    model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
        

In [None]:
model = create_model()
compile_model(model)

In [None]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

In [None]:
y_val = one_hot_train_labels[:1000]
partial_y_train = one_hot_train_labels[1000:]

In [None]:
history = model.fit(partial_x_train, partial_y_train,
                   epochs=20, batch_size=512, validation_data=(x_val,y_val))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def loss_plot(loss, val_loss, epochs):
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [None]:
def accuracy_plot(acc_values, val_acc_values, epochs):
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc_values, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()    

In [None]:
print history.history.keys()
history1 = history.history

loss = history1['loss']
val_loss = history1['val_loss']
epochs = range(1, len(loss) + 1)

In [None]:
loss_plot(loss, val_loss, epochs)

In [None]:
acc = history1['acc']
val_acc = history1['val_acc']

In [None]:
accuracy_plot(acc, val_acc, epochs)

In [None]:
# From the above, overfit occurs after about 9 epochs, we we'll train a new model for
# 9 epochs.
model = create_model()
compile_model(model)

In [None]:
history = model.fit(partial_x_train, partial_y_train,
                   epochs=9, batch_size=512, validation_data=(x_val,y_val))

In [None]:
results = model.evaluate(x_test, one_hot_test_labels)
print results

In [None]:
# What would random accuracy look like?
import copy
test_labels_copy = copy.copy(test_labels)
np.random.shuffle(test_labels_copy)
hits_array = np.array(test_labels) == np.array(test_labels_copy)
float(np.sum(hits_array))/len(test_labels)

In [None]:
# Generate predictions on all the test data
predictions = model.predict(x_test)

# Each prediction is a vector of length 46 (i.e. 46 categories)
predictions[0].shape

In [None]:
# The coefficients in the prediction sum to 1
np.sum(predictions[0])

In [None]:
# The largest entry is the predicted class
np.argmax(predictions[0])

In [None]:
predictions[0]

In [None]:
# Try a model with an information bottleneck - 46 inputs followed by 4 units
def create_bottlenecked_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',input_shape=(10000,)))
    model.add(layers.Dense(4, activation='relu'))
    model.add(layers.Dense(46,activation='softmax'))
    return model

In [None]:
model = create_bottlenecked_model()
compile_model(model)

In [None]:
model.fit(partial_x_train, partial_y_train,epochs=20,batch_size=128,validation_data=(x_val, y_val))

In [None]:
# Experiment with more layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu',input_shape=(10000,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46,activation='softmax'))

In [None]:
compile_model(model)

In [None]:
model.fit(partial_x_train, partial_y_train,epochs=20,batch_size=128,validation_data=(x_val, y_val))

In [None]:
# Try more hidden units
model = models.Sequential()
model.add(layers.Dense(128, activation='relu',input_shape=(10000,)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(46,activation='softmax'))

In [None]:
compile_model(model)

In [None]:
model.fit(partial_x_train, partial_y_train,epochs=20,batch_size=128,validation_data=(x_val, y_val))