In [4]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import tflearn
import pickle
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_1d, global_max_pool
from tflearn.layers.merge_ops import merge
from tflearn.layers.estimator import regression
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb

In [5]:
# Load preprocessed datasets
with open('preprocess_x_1.pickle', 'rb') as handle:
    x_shuffled = pickle.load(handle)

with open('preprocess_y_1.pickle', 'rb') as handle:
    y_shuffled = pickle.load(handle)

print ("Files loaded.")
print ("Size: {:d}".format(len(x_shuffled)))

Files loaded.
Size: 10662


In [6]:
# Split train/test set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_shuffled, y_shuffled, test_size=0.1, random_state=42)

print("Train features dimensions: {:d}, {:d}".format(*X_train.shape))
print("Train labels dimensions: {:d}, {:d}".format(*y_train.shape))
print("Test features dimensions: {:d}, {:d}".format(*X_test.shape))
print("Test labels dimensions: {:d}, {:d}".format(*y_test.shape))

Train features dimensions: 9595, 56
Train labels dimensions: 9595, 2
Test features dimensions: 1067, 56
Test labels dimensions: 1067, 2


In [7]:
# Building convolutional network
network = input_data(shape=[None, 56], name='input')
# Converts all words in vocabulary to lower dimensional representation
network = tflearn.embedding(network, input_dim=18758, output_dim=128)
branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
network = merge([branch1, branch2, branch3], mode='concat', axis=1)
# Change the shape by adding 2 to dimensions
network = tf.expand_dims(network, 2)
network = global_max_pool(network)
network = dropout(network, 0.5)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.001,
                     loss='categorical_crossentropy', name='target')

# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(X_train, y_train, 
          n_epoch = 5, shuffle=True, 
          validation_set=(X_test, y_test), 
          show_metric=True, batch_size=32, 
          run_id='rt_1')

Training Step: 1499  | total loss: [1m[32m0.02372[0m[0m | time: 38.677s
| Adam | epoch: 005 | loss: 0.02372 - acc: 0.9952 -- iter: 9568/9595
Training Step: 1500  | total loss: [1m[32m0.02278[0m[0m | time: 47.227s
| Adam | epoch: 005 | loss: 0.02278 - acc: 0.9957 | val_loss: 0.74611 - val_acc: 0.7629 -- iter: 9595/9595
--
