### check GPU

In [34]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

print("Is there a GPU available: "),
print(tf.config.experimental.list_physical_devices("GPU"))

Is there a GPU available: 
[]


In [2]:
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
cpus = tf.config.experimental.list_physical_devices(device_type='CPU')
print(gpus, cpus)

# gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
#   # Restrict TensorFlow to only use the first GPU
#   try:
#     tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
#     tf.config.experimental.set_memory_growth(gpus[0], True)
#   except RuntimeError as e:
#     # Visible devices must be set at program startup
#     print(e)

if cpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.experimental.set_visible_devices(cpus[0], 'CPU')
  except RuntimeError as e:
    # Visible devices must be set at program startup
    print(e)


[] [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


### Load text into datasets

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import tensorflow_datasets as tfds
import os

base_path = './data'
raw_data_path = 'rt-polaritydata'

parent_dir = os.path.join(base_path, raw_data_path)
FILE_NAMES = ['rt-polarity-pos.txt', 'rt-polarity-neg.txt']

def labeler(example, index):
  return example, tf.cast(index, tf.int64)

labeled_data_sets = []

for i, file_name in enumerate(FILE_NAMES):
    text_dir = os.path.join(parent_dir, file_name)
    print(text_dir)
    
    lines_dataset = tf.data.TextLineDataset(text_dir)
    labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
    labeled_data_sets.append(labeled_dataset)

labeled_data_sets

./data/rt-polaritydata/rt-polarity-pos.txt
./data/rt-polaritydata/rt-polarity-neg.txt


[<MapDataset shapes: ((), ()), types: (tf.string, tf.int64)>,
 <MapDataset shapes: ((), ()), types: (tf.string, tf.int64)>]

In [4]:
BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000

all_labeled_data = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
  all_labeled_data = all_labeled_data.concatenate(labeled_dataset)

all_labeled_data = all_labeled_data.shuffle(
    BUFFER_SIZE, reshuffle_each_iteration=False)

for ex in all_labeled_data.take(5):
  print(ex)

(<tf.Tensor: id=53, shape=(), dtype=string, numpy=b'compelling revenge thriller , though somewhat weakened by a miscast leading lady . '>, <tf.Tensor: id=54, shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: id=55, shape=(), dtype=string, numpy=b'a pointed , often tender , examination of the pros and cons of unconditional love and familial duties . '>, <tf.Tensor: id=56, shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: id=57, shape=(), dtype=string, numpy=b"it's tommy's job to clean the peep booths surrounding her , and after viewing this one , you'll feel like mopping up , too . ">, <tf.Tensor: id=58, shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: id=59, shape=(), dtype=string, numpy=b'the movie quickly drags on becoming boring and predictable . i tried to read the time on my watch . '>, <tf.Tensor: id=60, shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: id=61, shape=(), dtype=string, numpy=b'light , silly , photographed with colour and depth , and rather a good time . '>, <tf.Tensor: id=62, sha

### Build vocabulary

In [5]:
import tensorflow_datasets as tfds

tokenizer = tfds.features.text.Tokenizer()

vocabulary_set = set()
for text_tensor, _ in all_labeled_data:
  some_tokens = tokenizer.tokenize(text_tensor.numpy())
  vocabulary_set.update(some_tokens)

vocab_size = len(vocabulary_set)
vocab_size

18369

### Encode examples

In [6]:
encoder = tfds.features.text.TokenTextEncoder(vocabulary_set)

In [7]:
example_text = next(iter(all_labeled_data))[0].numpy()
print(example_text)

b'compelling revenge thriller , though somewhat weakened by a miscast leading lady . '


In [8]:
encoded_example = encoder.encode(example_text)
print(encoded_example)

[2595, 6924, 5196, 8991, 2712, 11275, 17487, 13589, 16725, 5316, 12438]


In [9]:
def encode(text_tensor, label):
  encoded_text = encoder.encode(text_tensor.numpy())
  return encoded_text, label

def encode_map_fn(text, label):
  return tf.py_function(encode, inp=[text, label], Tout=(tf.int64, tf.int64))

all_encoded_data = all_labeled_data.map(encode_map_fn)

### Split the dataset into test and train batches

In [10]:
train_data = all_encoded_data.skip(TAKE_SIZE).shuffle(BUFFER_SIZE)
train_data = train_data.padded_batch(BATCH_SIZE, padded_shapes=([-1],[]))

test_data = all_encoded_data.take(TAKE_SIZE)
test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([-1],[]))

train_data

<PaddedBatchDataset shapes: ((None, None), (None,)), types: (tf.int64, tf.int64)>

In [11]:
sample_text, sample_labels = next(iter(test_data))

sample_text[0], sample_labels[0]

(<tf.Tensor: id=21634, shape=(42,), dtype=int64, numpy=
 array([ 2595,  6924,  5196,  8991,  2712, 11275, 17487, 13589, 16725,
         5316, 12438,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0])>,
 <tf.Tensor: id=21638, shape=(), dtype=int64, numpy=0>)

In [12]:
vocab_size += 1

### Build the model

In [27]:
model = tf.keras.Sequential()

In [28]:
model.add(tf.keras.layers.Embedding(vocab_size, 64))

In [29]:
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))

In [30]:
# One or more dense layers.
# Edit the list in the `for` line to experiment with layer sizes.
for units in [64, 64]:
  model.add(tf.keras.layers.Dense(units, activation='relu'))

# Output layer. The first argument is the number of labels.
model.add(tf.keras.layers.Dense(3, activation='softmax'))

In [31]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'],
              experimental_run_tf_function=False,)

In [32]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 64)          1175680   
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128)               66048     
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_4 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 195       
Total params: 1,254,339
Trainable params: 1,254,339
Non-trainable params: 0
_________________________________________________________________


### Train the model

In [33]:
history = model.fit(train_data, epochs=3, validation_data=test_data)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [23]:
eval_loss, eval_acc = model.evaluate(test_data)

print('\nEval loss: {:.3f}, Eval accuracy: {:.3f}'.format(eval_loss, eval_acc))

     79/Unknown - 2s 22ms/step - loss: 0.5491 - accuracy: 0.7486
Eval loss: 0.549, Eval accuracy: 0.749


In [24]:
from hbconfig import Config

In [25]:
Config("rt-polarity")

In [26]:
Config

Read config file name: ./config/rt-polarity
{
    "data": {
        "type": "rt-polarity",
        "base_path": "data/",
        "raw_data_path": "rt-polaritydata/",
        "processed_path": "rt-polarity_processed_data",
        "testset_size": 2000,
        "num_classes": 2,
        "PAD_ID": 0
    },
    "model": {
        "batch_size": 64,
        "embed_type": "rand",
        "pretrained_embed": "",
        "embed_dim": 300,
        "num_filters": 256,
        "filter_sizes": [
            2,
            3,
            4,
            5
        ],
        "dropout": 0.5
    },
    "train": {
        "learning_rate": 1e-05,
        "train_steps": 20000,
        "model_dir": "logs/rt-polarity",
        "save_checkpoints_steps": 100,
        "loss_hook_n_iter": 100,
        "check_hook_n_iter": 100,
        "min_eval_frequency": 100,
        "print_verbose": true,
        "debug": false
    },
    "slack": {
        "webhook_url": ""
    }
}