In [1]:
import pandas as pd
import sys  
import numpy as np
from keras import utils
from keras.models import Sequential


from sklearn.model_selection import train_test_split
from keras.preprocessing import text, sequence
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from keras.layers import Dense, Activation, Dropout

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
filepath = 'input/news_data.csv'
df = pd.read_csv(filepath, encoding='latin-1')

df['CATEGORY'] = df.CATEGORY.map({'b': 0, 't': 1, 'e': 2, 'm': 3})
sentences = df['TITLE']
y = df['CATEGORY']

sentences_train, sentences_test, y_train, y_test = train_test_split(    
    sentences, y, test_size=0.25, random_state=1000)

In [3]:
s = pd.concat([sentences_test, y_test], axis=1)
s.to_csv('sample_test.csv', sep=',')

In [4]:
max_words = 1000
tokenize = text.Tokenizer(num_words=max_words, char_level=False)

In [5]:
tokenize.fit_on_texts(sentences_train) 
x_train = tokenize.texts_to_matrix(sentences_train)
x_test = tokenize.texts_to_matrix(sentences_test)

In [6]:
encoder = LabelEncoder()
encoder.fit(y_train)
y_train = encoder.transform(y_train)
y_test = encoder.transform(y_test)

In [7]:
num_classes = np.max(y_train)+1
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)

In [8]:
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

x_train shape: (749, 1000)
x_test shape: (250, 1000)
y_train shape: (749, 2)
y_test shape: (250, 2)


In [9]:
batch_size = 32
epochs = 2

In [10]:
# Build the model
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [14]:
my_y = model.predict_classes(x_test)
print(my_y)
np.savetxt("test_output.txt", my_y, delimiter=",")

[1 1 1 1 0 1 0 1 1 1 1 0 0 0 0 1 1 0 1 1 0 1 0 1 1 0 0 1 1 0 1 0 0 1 0 0 0
 0 0 0 1 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 0 0 1 1 1 1 0 0 1 1 0 0 0 0 1 1 1 1
 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 0 1 0 0 1
 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 0 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1
 1 0 0 0 1 0 1 0 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0
 0 1 0 0 1 0 1 1 0 1 0 0 0 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1
 0 0 1 0 0 0 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0]


In [12]:
score = model.evaluate(x_test, y_test,
                       batch_size=batch_size, verbose=1)
print('Test accuracy:', score[1])

 32/250 [==>...........................] - ETA: 4s



Test accuracy: 0.47200000238418577
