In [2]:
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np

In [23]:
tf.__version__

'2.13.0-rc0'

### Fetch the data

In [3]:
data, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-07-01 12:19:37.069886: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-07-01 12:19:37.070512: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
print(data)

{'train': <_PrefetchDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>, 'test': <_PrefetchDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>, 'unsupervised': <_PrefetchDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>}


### Split the data into traingin and testing

In [5]:
train_data, test_data = data['train'], data['test']
print(train_data)

<_PrefetchDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>


In [6]:
def parse_review(dataset):
    reviews = []
    labels = []

    for review, label in dataset:
        reviews.append(review.numpy().decode('utf8'))
        labels.append(label.numpy())
    return reviews, labels

In [7]:
train_review, train_label = parse_review(train_data)
test_review, test_label = parse_review(test_data)

2023-07-01 12:19:39.336136: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int64 and shape [1]
	 [[{{node Placeholder/_4}}]]
2023-07-01 12:19:39.336488: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [1]
	 [[{{node Placeholder/_2}}]]
2023-07-01 12:19:40.608334: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1]
	 [[{{no

In [8]:
print(len(train_review), len(train_label))
print(len(test_review), len(test_label))

25000 25000
25000 25000


### Tokenize words and padthem

In [9]:
numwords = 20000
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=numwords, oov_token='<OOV>')
pad = tf.keras.preprocessing.sequence.pad_sequences

tokenizer.fit_on_texts(train_review)

train_seq = tokenizer.texts_to_sequences(train_review)
test_seq = tokenizer.texts_to_sequences(test_review)

In [13]:
mx_len = 0
for i in train_seq:
    mx_len = max(mx_len, len(i))

In [14]:



train_pad = pad(train_seq, padding='post', maxlen=120, truncating='post')
test_pad = pad(test_seq, padding='post', maxlen=120, truncating='post')

print(train_pad.shape)
print(test_pad.shape)

(25000, 120)
(25000, 120)


In [15]:
np.array(test_label).shape

(25000,)

### Define Model

In [20]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=numwords+1, output_dim=10, input_length=120),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(8)),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

loss = tf.keras.losses.BinaryCrossentropy()
optim = tf.keras.optimizers.Adam(1e-4)

model.compile(
    loss = loss,
    optimizer=optim,
    metrics=['acc']
)




In [21]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 120, 10)           200010    
                                                                 
 bidirectional_1 (Bidirecti  (None, 16)                1216      
 onal)                                                           
                                                                 
 dense_2 (Dense)             (None, 6)                 102       
                                                                 
 dense_3 (Dense)             (None, 1)                 7         
                                                                 
Total params: 201335 (786.46 KB)
Trainable params: 201335 (786.46 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


### Train Model

In [22]:
train_label_mdl = np.array(train_label,dtype=np.float32)
test_label_mdl = np.array(test_label,dtype=np.float32)
history = model.fit(
    train_pad,
    train_label_mdl,
    epochs=15,
    batch_size=250,
    validation_data=(test_pad, test_label_mdl)
)

Epoch 1/15


2023-07-01 12:21:08.361618: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-07-01 12:21:08.586596: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-07-01 12:21:08.603984: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-07-01 12:21:08.829975: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-07-01 12:21:08.854500: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-07-01 12:21:14.327851: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-07-01 12:21:14.424194: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-07-01 12:21:14.437070: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
