In [1]:
import matplotlib.pyplot as plt
import os
import re
import shutil
import string
import tensorflow as tf
import keras

from tensorflow.keras import layers
from tensorflow.keras import losses

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

dataset = tf.keras.utils.get_file("aclImdb_v1", url,
                                    untar=True, cache_dir='.',
                                    cache_subdir='')

dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')


In [15]:
train_dir = os.path.join(dataset_dir, 'train')
os.listdir(train_dir)


NameError: name 'dataset_dir' is not defined

In [6]:
remove_dir = os.path.join(train_dir, 'unsup')
shutil.rmtree(remove_dir)


In [7]:
batch_size = 32
seed = 42

raw_train_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/train', 
    batch_size=batch_size, 
    validation_split=0.2, 
    subset='training', 
    seed=seed)


Found 25000 files belonging to 2 classes.
Using 20000 files for training.


In [8]:
for text_batch, label_batch in raw_train_ds.take(1):
  for i in range(3):
    print("Review", text_batch.numpy()[i])
    print("Label", label_batch.numpy()[i])


Review b'"Pandemonium" is a horror movie spoof that comes off more stupid than funny. Believe me when I tell you, I love comedies. Especially comedy spoofs. "Airplane", "The Naked Gun" trilogy, "Blazing Saddles", "High Anxiety", and "Spaceballs" are some of my favorite comedies that spoof a particular genre. "Pandemonium" is not up there with those films. Most of the scenes in this movie had me sitting there in stunned silence because the movie wasn\'t all that funny. There are a few laughs in the film, but when you watch a comedy, you expect to laugh a lot more than a few times and that\'s all this film has going for it. Geez, "Scream" had more laughs than this film and that was more of a horror film. How bizarre is that?<br /><br />*1/2 (out of four)'
Label 0
Review b"David Mamet is a very interesting and a very un-equal director. His first movie 'House of Games' was the one I liked best, and it set a series of films with characters whose perspective of life changes as they get into 

In [9]:
raw_val_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/train', 
    batch_size=batch_size, 
    validation_split=0.2, 
    subset='validation', 
    seed=seed)


Found 25000 files belonging to 2 classes.
Using 5000 files for validation.


In [10]:
raw_test_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/test', 
    batch_size=batch_size)


Found 25000 files belonging to 2 classes.


In [7]:
def custom_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
  return tf.strings.regex_replace(stripped_html,
                                  '[%s]' % re.escape(string.punctuation),
                                  '')


In [8]:
max_features = 10000
sequence_length = 250

vectorize_layer = layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length)


In [13]:
# Make a text-only dataset (without labels), then call adapt
train_text = raw_train_ds.map(lambda x, y: x)
vectorize_layer.adapt(train_text)


In [14]:
def vectorize_text(text, label):
  text = tf.expand_dims(text, -1)
  return vectorize_layer(text), label

In [15]:
# retrieve a batch (of 32 reviews and labels) from the dataset
text_batch, label_batch = next(iter(raw_train_ds))
first_review, first_label = text_batch[0], label_batch[0]
print("Review", first_review)
print("Label", raw_train_ds.class_names[first_label])
print("Vectorized review", vectorize_text(first_review, first_label))

Review tf.Tensor(b'Great movie - especially the music - Etta James - "At Last". This speaks volumes when you have finally found that special someone.', shape=(), dtype=string)
Label neg
Vectorized review (<tf.Tensor: shape=(1, 250), dtype=int64, numpy=
array([[  86,   17,  260,    2,  222,    1,  571,   31,  229,   11, 2418,
           1,   51,   22,   25,  404,  251,   12,  306,  282,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
       

In [16]:
train_ds = raw_train_ds.map(vectorize_text)
val_ds = raw_val_ds.map(vectorize_text)
test_ds = raw_test_ds.map(vectorize_text)


In [17]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)


In [18]:
embedding_dim = 16


In [19]:
model = tf.keras.Sequential([
  layers.Embedding(max_features + 1, embedding_dim),
  layers.Dropout(0.2),
  layers.GlobalAveragePooling1D(),
  layers.Dropout(0.2),
  layers.Dense(1)])

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 16)          160016    
                                                                 
 dropout (Dropout)           (None, None, 16)          0         
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense (Dense)               (None, 1)                 17        
                                                                 
Total params: 160033 (625.13 KB)
Trainable params: 160033 (625.13 KB)
Non-trainable params: 0 (0.00 Byte)
________________

In [20]:
model.compile(loss=losses.BinaryCrossentropy(from_logits=True),
              optimizer='adam',
              metrics=tf.metrics.BinaryAccuracy(threshold=0.0))


In [21]:
epochs = 10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
loss, accuracy = model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)


Loss:  0.3097550868988037
Accuracy:  0.8741199970245361


In [25]:
export_model = tf.keras.Sequential([
  vectorize_layer,
  model,
  layers.Activation('sigmoid')
])

export_model.compile(
    loss=losses.BinaryCrossentropy(from_logits=False), optimizer="adam", metrics=['accuracy']
)


In [26]:
# Test it with `raw_test_ds`, which yields raw strings
loss, accuracy = export_model.evaluate(raw_test_ds)
print(accuracy)


0.8741199970245361


In [27]:
export_model.predict(["I ll recommend this movie to all my friends"])



array([[0.6004984]], dtype=float32)

In [29]:
examples = [
  "The movie was great!",
  "The movie was okay.",
  "The movie was terrible...",
  "I ll recommend this movie to all my friends"    
]

export_model.predict(examples)




array([[0.6143436 ],
       [0.43581507],
       [0.35280746],
       [0.6004984 ]], dtype=float32)

In [37]:
model.save("simple_imdb_model.dat")

INFO:tensorflow:Assets written to: simple_imdb_model.dat/assets


INFO:tensorflow:Assets written to: simple_imdb_model.dat/assets


In [5]:
model2=keras.saving.load_model("simple_imdb_model.dat")

In [12]:
export_model2 = tf.keras.Sequential([
  vectorize_layer,
  model2,
  layers.Activation('sigmoid')
])

export_model2.compile(
    loss=losses.BinaryCrossentropy(from_logits=False), optimizer="adam", metrics=['accuracy']
)

In [10]:
export_model2.predict(examples)

NameError: name 'examples' is not defined

In [1]:
examples2 = ["What an amazing movie!", "A total waste of my time."]

In [13]:
export_model2.predict(examples2)

2023-12-14 17:55:16.887540: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at lookup_table_op.cc:929 : FAILED_PRECONDITION: Table not initialized.


FailedPreconditionError: Graph execution error:

Detected at node sequential_1/text_vectorization/string_lookup/None_Lookup/LookupTableFindV2 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/home/alex/ai_venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1077, in launch_instance

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/alex/ai_venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 529, in dispatch_queue

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 518, in process_one

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 424, in dispatch_shell

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 766, in execute_request

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/home/alex/ai_venv/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/alex/ai_venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell

  File "/home/alex/ai_venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell

  File "/home/alex/ai_venv/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/home/alex/ai_venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async

  File "/home/alex/ai_venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes

  File "/home/alex/ai_venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code

  File "/tmp/ipykernel_69011/1119723558.py", line 1, in <module>

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 2655, in predict

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 2440, in predict_function

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 2425, in step_function

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 2413, in run_step

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 2381, in predict_step

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 590, in __call__

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/sequential.py", line 398, in call

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/functional.py", line 515, in call

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/functional.py", line 672, in _run_internal_graph

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/text_vectorization.py", line 632, in call

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1149, in __call__

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/index_lookup.py", line 754, in call

  File "/home/alex/ai_venv/lib/python3.11/site-packages/keras/src/layers/preprocessing/index_lookup.py", line 792, in _lookup_dense

Table not initialized.
	 [[{{node sequential_1/text_vectorization/string_lookup/None_Lookup/LookupTableFindV2}}]] [Op:__inference_predict_function_1038]