# Data

In [5]:
from sklearn.datasets import fetch_20newsgroups

categories = ["comp.graphics","sci.space","rec.sport.baseball"]

newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)

print 'Total texts in train:', len(newsgroups_train.data)
print 'Total texts in test:', len(newsgroups_test.data)

No handlers could be found for logger "sklearn.datasets.twenty_newsgroups"


Total texts in train: 1774
Total texts in test: 1180


In [6]:
print('text',newsgroups_train.data[0])

('text', u"From: jk87377@lehtori.cc.tut.fi (Kouhia Juhana)\nSubject: Re: More gray levels out of the screen\nOrganization: Tampere University of Technology\nLines: 21\nDistribution: inet\nNNTP-Posting-Host: cc.tut.fi\n\nIn article <1993Apr6.011605.909@cis.uab.edu> sloan@cis.uab.edu\n(Kenneth Sloan) writes:\n>\n>Why didn't you create 8 grey-level images, and display them for\n>1,2,4,8,16,32,64,128... time slices?\n\nBy '8 grey level images' you mean 8 items of 1bit images?\nIt does work(!), but it doesn't work if you have more than 1bit\nin your screen and if the screen intensity is non-linear.\n\nWith 2 bit per pixel; there could be 1*c_1 + 4*c_2 timing,\nthis gives 16 levels, but they are linear if screen intensity is\nlinear.\nWith 1*c_1 + 2*c_2 it works, but we have to find the best\ncompinations -- there's 10 levels, but 16 choises; best 10 must be\nchosen. Different compinations for the same level, varies a bit, but\nthe levels keeps their order.\n\nReaders should verify what I wr

In [7]:
print('category:',newsgroups_train.target[0])

('category:', 0)


# Word2Index encoding

In [8]:
from collections import Counter

vocab = Counter()

for text in newsgroups_train.data:
    for word in text.split(' '):
        vocab[word.lower()]+=1
        
for text in newsgroups_test.data:
    for word in text.split(' '):
        vocab[word.lower()]+=1
        
total_words = len(vocab)        
print "Total words in vocab (in fact distinct words):", total_words

Total words in vocab (in fact distinct words): 119930


In [9]:
def get_word_2_index(vocab):
    word2index = {}
    for i,word in enumerate(vocab):
        word2index[word.lower()] = i
        
    return word2index

In [10]:
word2index = get_word_2_index(vocab)

print "Total words in word2index:", len(word2index)
print "Index of the word 'the':", word2index['the']

Total words in word2index: 119930
Index of the word 'the': 79118


# Neural Network

In [18]:
def multilayer_perceptron(input_tensor, weights, biases):
    
    # 1st hidden layer with ReLu activation
    layer_1_multiplication = tf.matmul(input_tensor, weights['h1'])
    layer_1_addition = tf.add(layer_1_multiplication, biases['b1'])
    layer_1_activation = tf.nn.relu(layer_1_addition)
    
    # 2nd hidden layer with ReLu activation
    layer_2_multiplication = tf.matmul(layer_1_activation, weights['h2'])
    layer_2_addition = tf.add(layer_2_multiplication, biases['b2'])
    layer_2_activation = tf.nn.relu(layer_2_addition)
    
    # Output layer with linear activation
    out_layer_multiplication = tf.matmul(layer_2_activation, weights['out'])
    out_layer_addition = out_layer_multiplication + biases['out']
    
    return out_layer_addition

In [13]:
def get_batch(df,i,batch_size):
    batches = []
    results = []
    texts = df.data[i*batch_size:i*batch_size+batch_size]
    categories = df.target[i*batch_size:i*batch_size+batch_size]
    for text in texts:
        layer = np.zeros(total_words,dtype=float)
        for word in text.split(' '):
            layer[word2index[word.lower()]] += 1
            
        batches.append(layer)
        
    for category in categories:
        y = np.zeros((3),dtype=float)
        if category == 0:
            y[0] = 1.
        elif category == 1:
            y[1] = 1.
        else:
            y[2] = 1.
        results.append(y)
            
     
    return np.array(batches),np.array(results)

In [14]:
# Parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 150
display_step = 1

In [15]:
import numpy as np

batch = get_batch(newsgroups_train,1,batch_size)

x = batch[0] # features
y = batch[1] # labels

print "Each batch contains a feature (x)  matrix of dimensions", x.shape
print ">>", x.shape[0], "texts encoded as word2vec"
print ">>", x.shape[1], "are the number of elements of each text word2vec representation"
print
print "Each batch contains a label (y) matrix of dimensions", y.shape
print ">>", y.shape[0], "labels encoded as one-hot-encoding"
print ">>", y.shape[1], "are the number of elements of each one-hot-encoding vector"

Each batch contains a feature (x)  matrix of dimensions (150, 119930)
>> 150 texts encoded as word2vec
>> 119930 are the number of elements of each text word2vec representation

Each batch contains a label (y) matrix of dimensions (150, 3)
>> 150 labels encoded as one-hot-encoding
>> 3 are the number of elements of each one-hot-encoding vector


In [16]:
# Network Parameters
n_hidden_1 = 100      # 1st layer number of features
n_hidden_2 = 100       # 2nd layer number of features
n_input = total_words # Words in vocab
n_classes = 3         # Categories: graphics, sci.space and baseball

In [24]:
input_tensor = tf.placeholder(tf.float32,[None, n_input],name="input")
output_tensor = tf.placeholder(tf.float32,[None, n_classes],name="output")

In [50]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
prediction = multilayer_perceptron(input_tensor, weights, biases)

# loss/cost 
entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=output_tensor)
loss = tf.reduce_mean(entropy_loss)

# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [52]:
# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        
        avg_cost = 0.
        total_batch = int(len(newsgroups_train.data)/batch_size)
        
        # Loop over all batches
        for i in range(total_batch):
            
            batch_x, batch_y = get_batch(newsgroups_train, i ,batch_size)
            
            # Run optimization op (backprop) and cost op (to get loss value)
            c,_ = sess.run([loss,optimizer], feed_dict={input_tensor: batch_x, output_tensor:batch_y})
            
            # Compute average loss
            avg_cost += c / total_batch
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "loss=", \
                "{:.9f}".format(avg_cost))
            
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(output_tensor, 1))   
    
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    total_test_data = len(newsgroups_test.target)
    batch_x_test,batch_y_test = get_batch(newsgroups_test,0,total_test_data)
    print("Accuracy:", accuracy.eval({input_tensor: batch_x_test, output_tensor: batch_y_test}))

('Epoch:', '0001', 'loss=', '1320.704845082')
('Epoch:', '0002', 'loss=', '351.652041349')
('Epoch:', '0003', 'loss=', '121.758742246')
('Epoch:', '0004', 'loss=', '19.556734302')
('Epoch:', '0005', 'loss=', '14.648932972')
('Epoch:', '0006', 'loss=', '1.740777363')
('Epoch:', '0007', 'loss=', '0.053152447')
('Epoch:', '0008', 'loss=', '1.052541895')
('Epoch:', '0009', 'loss=', '0.000000000')
('Epoch:', '0010', 'loss=', '0.000000000')
Optimization Finished!
('Accuracy:', 0.74661022)


# TFLearn

In [17]:
import tensorflow as tf
tf.__version__

'1.1.0-rc0'

In [18]:
import tflearn as tflearn

In [19]:
# Build neural network
net = tflearn.input_data(shape=[None, n_input])
net = tflearn.fully_connected(net, 100)
net = tflearn.fully_connected(net, 100)
net = tflearn.fully_connected(net, n_classes, activation='softmax')
net = tflearn.regression(net)

# Define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

# Start training (apply gradient descent algorithm)
model.fit(x, y, n_epoch=500, batch_size=16, show_metric=True)

Training Step: 4999  | total loss: [1m[32m0.12961[0m[0m | time: 0.491s
| Adam | epoch: 500 | loss: 0.12961 - acc: 0.9841 -- iter: 144/150
Training Step: 5000  | total loss: [1m[32m0.11702[0m[0m | time: 0.546s
| Adam | epoch: 500 | loss: 0.11702 - acc: 0.9857 -- iter: 150/150
--
