<a href="https://colab.research.google.com/github/bryanfree66/tensorflow_notebooks/blob/master/IMDB_Sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sentiment Analysis - IMDB
## Predict the Binary Judgement Given the Text

In [0]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing
import tensorflow_datasets as tfds

In [0]:
max_len = 200
n_words = 10000
dim_embedding = 256
EPOCHS = 20
BATCH_SIZE = 500

## Load the Data

In [0]:
def load_data():
  (X_train, y_train), (X_test, y_test) = datasets.imdb.load_data(num_words=n_words)
  X_train = preprocessing.sequence.pad_sequences(X_train,
                                                 maxlen=max_len)
  X_test = preprocessing.sequence.pad_sequences(X_test,
                                                maxlen=max_len)
  return(X_train, y_train), (X_test, y_test)

## Build the Model

In [0]:
def build_model():
  model = models.Sequential()
  # Input: - Embedding layer
  # The model will take as input an interger matrix of size (batch, input_length)
  # The model will output (input_length, dim_embedding)
  # The largest integer in the input should be no larger than n_words
  model.add(layers.Embedding(n_words,
                             dim_embedding,
                             input_length=max_len))
  model.add(layers.Dropout(0.3))
  model.add(layers.GlobalMaxPooling1D())
  model.add(layers.Dense(128,
                         activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(1,
                         activation='sigmoid'))
  return model

## Train the Model

In [25]:
(X_train, y_train), (X_test, y_test) = load_data()
model = build_model()
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 200, 256)          2560000   
_________________________________________________________________
dropout_4 (Dropout)          (None, 200, 256)          0         
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout_5 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 129       
Total params: 2,593,025
Trainable params: 2,593,025
Non-trainable params: 0
____________________________________________

In [0]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [27]:
score = model.fit(X_train, y_train,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE,
                  validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
score = model.evaluate(X_test, y_test,
                       batch_size=BATCH_SIZE)
print('Test score: {}\n'.format(score[0]))
print('Test accuracy: {}'.format(score[1]))

Test score: 0.5034577250480652

Test accuracy: 0.8484799861907959
