In [34]:
#@title MIT License
#
# Copyright (c) 2017 François Chollet
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

In [1]:
from __future__ import absolute_import, division, print_function,\
    unicode_literals

In [2]:
import tensorflow as tf

In [3]:
tf.enable_eager_execution()

In [4]:
from tensorflow import keras
import tensorflow_datasets as tfds

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [5]:
tfds.disable_progress_bar()

In [6]:
import numpy as np

In [7]:
print(tf.__version__)

1.14.0


In [8]:
# Download IMDB dataset. To encode own data, use
# Loading text tutorial from TF website.
(train_data, test_data), info = tfds.load(\
        'imdb_reviews/subwords8k',\
        split = (tfds.Split.TRAIN, tfds.Split.TEST), \
        as_supervised=True,\
        with_info=True)



In [9]:
# Try out the encoder
encoder = info.features['text'].encoder

In [10]:
print('Vocabulary Size: {}'.format(encoder.vocab_size))

Vocabulary Size: 8185


In [11]:
# Text encoder will reversibly encode any string.
sample_string = 'Hello TensorFlow.'

In [12]:
encoded_str = encoder.encode(sample_string)
original_str = encoder.decode(encoded_str)

In [13]:
print('Encoded string: {}'.format(encoded_str))
print('Original string: {}'.format(original_str))
assert original_str == sample_string

Encoded string: [4025, 222, 6307, 2327, 4043, 2120, 7975]
Original string: Hello TensorFlow.


In [14]:
# Encoder encodes string by breaking it into subwds.
# If the wd is not in the dictionary, breaks into chars.
# The more a string resembles the dataset, the shorter the
# encoded representation will be.
for ts in encoded_str:
    print('{} ---> {}'.format(ts, encoder.decode([ts])))

4025 ---> Hell
222 ---> o 
6307 ---> Ten
2327 ---> sor
4043 ---> Fl
2120 ---> ow
7975 ---> .


In [15]:
# Explore data. Data comes prepreocessed.
# Each example is an array of ints representing the words
# of the movie review.
# Each label is an int value of either 0 or 1: negative/positive
# review.
for train_example, train_label in train_data.take(1):
    print('Encoded text: ', train_example[:10].numpy())
    print('Label: ', train_label.numpy())

Encoded text:  [ 133   67 1011    5 5225 7961 1482 2252  755    6]
Label:  1


In [16]:
# info structure contains the encoder/decoder.
encoder.decode(train_example)

'A very close and sharp discription of the bubbling and dynamic emotional world of specialy one 18year old guy, that makes his first experiences in his gay love to an other boy, during an vacation with a part of his family.<br /><br />I liked this film because of his extremly clear and surrogated storytelling , with all this "Sound-close-ups" and quiet moments wich had been full of intensive moods.<br /><br />'

In [17]:
# Prep data for training.
BUFFER_SIZE = 100

In [18]:
train_data.padded_batch?

In [19]:
train_batches = (train_data.shuffle(BUFFER_SIZE).\
                 padded_batch(32, train_data.output_shapes))

Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(dataset)`.


Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(dataset)`.


In [20]:
test_batches = (test_data.\
                padded_batch(32, train_data.output_shapes))

In [21]:
for example_batch, label_batch in train_batches.take(2):
    print("Batch shape: ", example_batch.shape)
    print("Label shape: ", label_batch.shape)

Batch shape:  (32, 1260)
Label shape:  (32,)
Batch shape:  (32, 1306)
Label shape:  (32,)


In [22]:
# Build continuous bag of words model.
model = keras.Sequential([
    # Integer-encoded vocab --> looks up embedding vector for
    # each word-index. 
    # Resulting dimensions: (batch, sequence, embedding)
    keras.layers.Embedding(encoder.vocab_size, 16),
    # Returns a fixed-length output vector for each example by
    # averaging over the sequence dimension.
    # Pipes through FC layer with 16 hidden units (from dim
    # of prev embedding)
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dense(1, activation='sigmoid')
])

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          130960    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 17        
Total params: 130,977
Trainable params: 130,977
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.load_weights('tf_model_wts.h5')

In [26]:
# Compile model.
model.compile(optimizer='adam', loss='binary_crossentropy',\
             metrics=['accuracy'])

In [27]:
# Train model
#history = model.fit(train_batches, epochs=1, \
#                    validation_data=test_batches,\
#                   validation_steps=30)
#
#model.save('tf_model.h5')
#model.save_weights('tf_model_wts.h5')

In [28]:
# Evaluate model
loss, accuracy = model.evaluate(test_batches)

    782/Unknown - 266s 340ms/step - loss: 0.6486 - acc: 0.7176

In [29]:
print('Loss: ', loss)
print('Accuracy: ', accuracy)

Loss:  0.64856927222608
Accuracy:  0.7176


In [31]:
from keras.models import load_model

In [30]:
# Create a graph of accuracy and loss over time
history_dict = history.history

NameError: name 'history' is not defined

In [None]:
import matplotlib.pyplot as plt

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
plt.clf()   # clear figure

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

plt.show()