In [7]:
import os
import time

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import backend as K


def load_training_data(fpath, num_val_samples=250):
    df = pd.read_csv(fpath, usecols=['SentimentText', 'Sentiment'])
    df = df.sample(frac=1).reset_index(drop=True)

    text = df['SentimentText'].tolist()
    text = [str(t).encode('ascii', 'replace') for t in text]
    text = np.array(text, dtype=object)[:]
    # text = np.array(text, dtype=object)[:, np.newaxis]
    # labels = np.asarray(pd.get_dummies(df.label), dtype=np.int8)
    labels = df['Sentiment'].tolist()
    labels = np.array(pd.get_dummies(labels), dtype=int)[:] 

    train_text = text[num_val_samples:]
    train_labels = labels[num_val_samples:]
    val_text = text[:num_val_samples]
    val_labels = labels[:num_val_samples]

    return (train_text, train_labels), (val_text, val_labels)

In [8]:
fpath = "dataset.csv"
training_data, val_data = load_training_data(fpath)

In [25]:
def get_model(num_categories=4):
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1", output_shape=[50], 
                           input_shape=[], dtype=tf.string)

    # hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1", output_shape=[20],
    #                        input_shape=[], dtype=tf.string)

    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dense(2, activation='softmax'))

    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer='RMSProp', metrics=['acc'])
    return model


def train(fpath, epochs=2, batch_size=32):
    training_data, val_data = load_training_data(fpath)

    model = get_model()
    model.fit(training_data[0],
              training_data[1],
              validation_data=val_data,
              epochs=epochs,
              batch_size=batch_size)
    return model

In [26]:
    fpath = "dataset.csv"
    model = train(fpath)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_3 (KerasLayer)   (None, 50)                48190600  
_________________________________________________________________
dense_6 (Dense)              (None, 16)                816       
_________________________________________________________________
dense_7 (Dense)              (None, 2)                 34        
Total params: 48,191,450
Trainable params: 850
Non-trainable params: 48,190,600
_________________________________________________________________
Train on 24750 samples, validate on 250 samples
Epoch 1/2
Epoch 2/2


In [30]:
model.predict(["Terrible movie."])

array([[0.46619886, 0.53380114]], dtype=float32)

In [29]:
model.predict(["Very best movie ever."])

array([[0.4473396 , 0.55266035]], dtype=float32)

In [31]:
def export_model(model, base_path="./exported_models/"):
    path = os.path.join(base_path, str(int(time.time())))
    tf.saved_model.save(model, path)

In [33]:
export_model(model)

INFO:tensorflow:Assets written to: ./exported_models/1571698198/assets


INFO:tensorflow:Assets written to: ./exported_models/1571698198/assets
