<h2 align="center">Amazon Review Dataset!</h2>

In [1]:
import os
import time
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

In [2]:
def load_dataset(file_path, num_samples):
    df = pd.read_csv(file_path, usecols=[6, 9], nrows=num_samples)
    df.columns = ['rating', 'title']

    text = df['title'].tolist()
    text = [str(t).encode('ascii', 'replace') for t in text]
    text = np.array(text, dtype=object)[:]
    
    labels = df['rating'].tolist()
    labels = [1 if i>=4 else 0 if i==3 else -1 for i in labels]
    labels = np.array(pd.get_dummies(labels), dtype=int)[:] 

    return labels, text

In [3]:
tmp_labels, tmp_text = load_dataset('amazon_review/train.csv', 100)
tmp_text.shape

(100,)

### Task 3: Build the Classification Model using TF Hub

In [4]:
def get_model():
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1", 
                           output_shape=[50], input_shape=[], 
                           dtype=tf.string, name='input', trainable=False)

    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='softmax', name='output'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam', metrics=['accuracy'])
    model.summary()
    return model

In [5]:
#embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")

### Task 4: Define Training Procedure

In [6]:
def train(EPOCHS=5, BATCH_SIZE=32, TRAIN_FILE='amazon_review/train.csv', 
          VAL_FILE='amazon_review/test.csv'):
    WORKING_DIR = os.getcwd() #use to specify model checkpoint path
    print("Loading training/validation data ...")
    y_train, x_train = load_dataset(TRAIN_FILE, num_samples=100000)
    y_val, x_val = load_dataset(VAL_FILE, num_samples=10000)

    print("Training the model ...")
    model = get_model()
    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1,
              validation_data=(x_val, y_val)
             )
    return model

### Task 5: Train and Export Model as Protobuf

In [7]:
model = train()

Loading training/validation data ...
Training the model ...
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (KerasLayer)           (None, 50)                48190600  
_________________________________________________________________
dense (Dense)                (None, 16)                816       
_________________________________________________________________
output (Dense)               (None, 3)                 51        
Total params: 48,191,467
Trainable params: 867
Non-trainable params: 48,190,600
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Task 6: Test Model

#### Negative Review:

In [8]:
test_sentence = "What a horrible book! It was such a waste of time reading it!"
model.predict([test_sentence])

array([[0.665351  , 0.07995597, 0.25469312]], dtype=float32)

#### Positive Review:

In [9]:
test_sentence = "Awesome product. I love it!"
model.predict([test_sentence])

array([[0.01110581, 0.01236449, 0.9765297 ]], dtype=float32)