# Keras Classifier

### Importing data

In [1]:
import keras.preprocessing as prep
from tensorflow import keras
import pickle
from nltk.corpus import stopwords
from string import punctuation
import re


x_train, y_train, x_test, y_test, vocab_size, max_length = pickle.load(open("data/keras-data.pickle", "rb")).values()

Using TensorFlow backend.


### Preprocessing data

In [2]:
x_train = prep.sequence.pad_sequences(x_train, maxlen=max_length)
x_test = prep.sequence.pad_sequences(x_test, maxlen=max_length)

### Creating model

In [3]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Embedding, LSTM, Dropout
import os

def build_model(save=False):
    model_path = 'LSTM_ReviewClf_24-03-2018.h5'
    exists = os.path.isfile(model_path)
    if exists:
        print("Loading existing model...")
        return load_model(model_path), exists
    else:
        print("Creating new model")
        model = Sequential()
        model.add(Embedding(vocab_size, output_dim=256))
        model.add(LSTM(256))
        model.add(Dense(2))
        model.add(Activation("softmax"))

        model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['mae', 'acc'])
        model.save(model_path) if save else None
        return model, exists

In [4]:
from keras.utils import to_categorical

# Building model
model, exists = build_model()
epochs = 10
# Fitting model
if not exists:
    y_train_binary = to_categorical(y_train, num_classes=2)
    
    print("Fitting model...     ", end='\r')
    train_history = model.fit(x_train, y_train_binary, batch_size=126, epochs=epochs)
    
    print("Plotting epoch figure...    ", end='\r')
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, epochs), train_history.history["loss"], label="train_loss")
    plt.plot(np.arange(0, epochs), train_history.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, epochs), train_history.history["acc"], label="train_acc")
    plt.plot(np.arange(0, epochs), train_history.history["val_acc"], label="val_acc")
    plt.title("Loss/Accuracy on Reviews")
    plt.xlabel("epoch")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig('epoch_fig')

Loading existing model...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


### Predicting dataset 

In [None]:
from sklearn.metrics import accuracy_score
import numpy as np

predictions_classes = model.predict_classes(x_test, batch_size=16, verbose=1)
print(f"Accuracy: {accuracy_score(predictions_classes, y_test)}")

 25008/130528 [====>.........................] - ETA: 41:08