In [122]:
import numpy as np
from sklearn import metrics
import pandas

import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
import skflow

In [123]:
train = pandas.read_csv('dbpedia_csv/train.csv', header=None)
test = pandas.read_csv('dbpedia_csv/test.csv', header=None)

In [124]:
X_train, y_train = train[2], train[0]
X_test, y_test = test[2], test[0]

In [125]:
MAX_DOCUMENT_LENGTH = 100

In [126]:
char_processor = skflow.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH)

In [127]:
X_train = np.array(list(char_processor.fit_transform(X_train)))
X_test = np.array(list(char_processor.transform(X_test)))

In [128]:
HIDDEN_SIZE = 128

def char_rnn_model(X,y):
    byte_list = skflow.ops.one_hot_matrix(X, 256)
    byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
    cell = rnn_cell.GRUCell(HIDDEN_SIZE)
    _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding[-1], y)

In [129]:
classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15,
    steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)

In [130]:
for i in range(1):
    classifier.fit(X_train,y_train, logdir='./model_1')
    score = metrics.accuracy_score(classifier.predict(X_test), y_test)
    print("Accuracy: %f" % score)

Step #1, avg. loss: 3.30363
Step #11, avg. loss: 2.97571
Step #21, avg. loss: 2.70808
Step #31, avg. loss: 2.69752
Step #41, avg. loss: 2.61704
Step #51, avg. loss: 2.59103
Step #61, avg. loss: 2.58642
Step #71, avg. loss: 2.54714
Step #81, avg. loss: 2.52415
Step #91, avg. loss: 2.44612
Accuracy: 0.227571


In [131]:
predictions = classifier.predict(X_test[0:1])

In [132]:
predictions

array([11])

In [80]:
X_test.shape

(70000, 100)

In [83]:
X_test[0:1]

array([[ 32,  84,  89,  32,  75,  85,  32,  47, 116,  97, 201, 170, 107,
        117, 203, 144,  47,  32, 105, 115,  32,  97, 110,  32,  65, 109,
        101, 114, 105,  99,  97, 110,  32,  97, 108,  99, 111, 104, 111,
        108, 105,  99,  32,  98, 101, 118, 101, 114,  97, 103, 101,  32,
         99, 111, 109, 112,  97, 110, 121,  32, 116, 104,  97, 116,  32,
        115, 112, 101,  99, 105,  97, 108, 105, 122, 101, 115,  32, 105,
        110,  32, 115,  97, 107, 101,  32,  97, 110, 100,  32, 111, 116,
        104, 101, 114,  32, 115, 112, 105, 114, 105]], dtype=uint8)

In [86]:
"".join(char_processor.reverse(X_test[0:1]))

' TY KU /taɪkuː/ is an American alcoholic beverage company that specializes in sake and other spiri'

In [111]:
b = skflow.ops.one_hot_matrix(X_test[0:1],256)