In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import tensorflow_probability as tfp

In [2]:
(train_dataset,test_dataset),ds_info = tfds.load('imdb_reviews',
                                                 with_info = True,
                                         split = ['train','test'],
                                                  as_supervised = True,
                                         shuffle_files = True,
                                         batch_size = 128

                                                 )

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete5EO6JY/imdb_reviews-train.tfrecord…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete5EO6JY/imdb_reviews-test.tfrecord*…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete5EO6JY/imdb_reviews-unsupervised.t…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [3]:
def text_vectorization(dataset,vectorizer):
  """
  Uses TextVectorization Layer outside of the model.
  The purpose is to make the NN model suitable to .h5 format.

  Parameters
  ----------
  dataset: Tensorflow dataset containing data and labels.
  vectorizer: A TextVectorization layer adapted to train_dataset
  """
  #conduct all the process in CPU to CPU-GPU conflict
  with tf.device("/cpu:0"):
    outputs = []
    label_list = []

    #tokenize and pad the data and store it
    for x, y in dataset:
        output = vectorizer(x)
        outputs.append(output)
        label_list.append(y)

    #concatenate the labels and data
    X_vectorized = tf.concat(outputs, axis=0)
    Y_labels = tf.concat(label_list, axis=0)

    vectorized = tf.data.Dataset.from_tensor_slices((X_vectorized, Y_labels)).batch(32).prefetch(tf.data.AUTOTUNE)
    return vectorized

def define_callbacks(model):
  es = tf.keras.callbacks.EarlyStopping(patience = 5,verbose = 1, restore_best_weights = True)
  mc = tf.keras.callbacks.ModelCheckpoint(filepath = f"./ModelCheckpoints/{model.name}.ckpt",
                                         save_best_only = True,
                                         save_weights_only = True)
  tb = tf.keras.callbacks.TensorBoard(log_dir = f"./TensorboardLogs/{model.name}")

  return es,mc,tb

In [4]:
#get the vocab length
vectorizer = layers.TextVectorization(max_tokens = None)
vectorizer.adapt(train_dataset.map(lambda x,y: x))
total_words = vectorizer.vocabulary_size()

In [5]:
#save the length of the sequences to find the output sequence length
text_lengths = []
train_dataset_unbatched = train_dataset.unbatch()
for text,label in train_dataset_unbatched: #unbatch the dataset
  text_lengths.append(len(text.numpy().split())) #append the sentence length

In [6]:
MAX_TOKENS = 20000
OUTPUT_SEQUENCE_LENGTH = int(tfp.stats.percentile(text_lengths,95).numpy())
EMBEDDING_DIM = 128
MODEL_NAME = "model1"

#define the actual vectorizer
vectorizer = tf.keras.layers.TextVectorization(max_tokens = MAX_TOKENS,
                                               output_sequence_length = OUTPUT_SEQUENCE_LENGTH)

vectorizer.adapt(train_dataset.map(lambda x,y: x))
#preprocess the data
train_dataset_vectorized = text_vectorization(train_dataset,vectorizer)
test_dataset_vectorized = text_vectorization(test_dataset,vectorizer)


inputs = tf.keras.layers.Input(shape = (598,))

x = layers.Embedding(input_dim = MAX_TOKENS,
                      output_dim = EMBEDDING_DIM)(inputs)
x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(1,activation = 'sigmoid')(x)

#create the base model
model = tf.keras.Model(inputs,outputs,name = MODEL_NAME)

#create the callbacks
es,mc,tb = define_callbacks(model)

#compile the model
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

#fit the model
history = model.fit(train_dataset_vectorized,validation_data = test_dataset_vectorized,epochs = 20,
                    callbacks = [es,mc,tb])



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping


In [8]:
MAX_TOKENS = 20000
OUTPUT_SEQUENCE_LENGTH = int(tfp.stats.percentile(text_lengths,95).numpy())
EMBEDDING_DIM = 128
MODEL_NAME = "model2"

#define the actual vectorizer
vectorizer = tf.keras.layers.TextVectorization(max_tokens = MAX_TOKENS,
                                               output_sequence_length = OUTPUT_SEQUENCE_LENGTH)
vectorizer.adapt(train_dataset.map(lambda x,y: x))
#preprocess the data
train_dataset_vectorized = text_vectorization(train_dataset,vectorizer)
test_dataset_vectorized = text_vectorization(test_dataset,vectorizer)


inputs = tf.keras.layers.Input(shape = (598,))

x = layers.Embedding(input_dim = MAX_TOKENS,
                      output_dim = EMBEDDING_DIM)(inputs)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(64,activation = 'relu')(x)
outputs = layers.Dense(1,activation = 'sigmoid')(x)

#create the base model
model2 = tf.keras.Model(inputs,outputs,name = MODEL_NAME)

#create the callbacks
es,mc,tb = define_callbacks(model2)

#compile the model
model2.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

#fit the model
history2 = model2.fit(train_dataset_vectorized,validation_data = test_dataset_vectorized,epochs = 20,
                    callbacks = [es,mc,tb])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping


In [21]:
MAX_TOKENS = 10000
OUTPUT_SEQUENCE_LENGTH = int(tfp.stats.percentile(text_lengths,95).numpy())
EMBEDDING_DIM = 50
MODEL_NAME = 'model3'

#define the actual vectorizer
vectorizer = tf.keras.layers.TextVectorization(max_tokens = MAX_TOKENS,
                                               output_sequence_length = OUTPUT_SEQUENCE_LENGTH)
vectorizer.adapt(train_dataset.map(lambda x,y: x))

#preprocess the data
train_dataset_vectorized = text_vectorization(train_dataset,vectorizer)
test_dataset_vectorized = text_vectorization(test_dataset,vectorizer)

inputs = tf.keras.layers.Input(shape = (598,))
x = layers.Embedding(input_dim = MAX_TOKENS,
                      output_dim = EMBEDDING_DIM)(inputs)
x = layers.GlobalAveragePooling1D()(x)
#x = layers.Dense(64,activation = 'relu')(x)
outputs = layers.Dense(1,activation = 'sigmoid')(x)

#create the base model
model3 = tf.keras.Model(inputs,outputs,name = MODEL_NAME)

#create the callbacks
es,mc,tb = define_callbacks(model3)

#compile the model
model3.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

#fit the model
history3 = model3.fit(train_dataset_vectorized,validation_data = test_dataset_vectorized,epochs = 20,
                    callbacks = [es,mc,tb])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 14: early stopping


In [22]:
MAX_TOKENS = 10000
OUTPUT_SEQUENCE_LENGTH = int(tfp.stats.percentile(text_lengths,95).numpy())
EMBEDDING_DIM = 50
MODEL_NAME = 'model4'


inputs = tf.keras.layers.Input(shape = (598,))
x = layers.Embedding(input_dim = MAX_TOKENS,
                      output_dim = EMBEDDING_DIM)(inputs)
x = layers.Flatten()(x)
outputs = layers.Dense(1,activation = 'sigmoid')(x)

#create the base model
model4 = tf.keras.Model(inputs,outputs,name = MODEL_NAME)

#create the callbacks
es,mc,tb = define_callbacks(model4)

#compile the model
model4.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

#fit the model
history4 = model4.fit(train_dataset_vectorized,validation_data = test_dataset_vectorized,epochs = 20,
                    callbacks = [es,mc,tb])



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping


In [23]:
!tensorboard dev upload --logdir /content/TensorboardLogs


***** TensorBoard Uploader *****

This will upload your TensorBoard logs to https://tensorboard.dev/ from
the following directory:

/content/TensorboardLogs

This TensorBoard will be visible to everyone. Do not upload sensitive
data.

Your use of this service is subject to Google's Terms of Service
<https://policies.google.com/terms> and Privacy Policy
<https://policies.google.com/privacy>, and TensorBoard.dev's Terms of Service
<https://tensorboard.dev/policy/terms/>.

This notice will not be shown again while you are logged into the uploader.
To log out, run `tensorboard dev auth revoke`.

Continue? (yes/NO) yes

To sign in with the TensorBoard uploader:

1. On your computer or phone, visit:

   https://www.google.com/device

2. Sign in with your Google account, then enter:

   NHCS-QFSL


Upload started and will continue reading any new data as it's added to the logdir.

To stop uploading, press Ctrl-C.

New experiment created. View your TensorBoard at: https://tensorboard.dev/expe

Model1 is the best one

**ÇOK ÖNEMLİ** TEXT VECTORIZATION H5'E DAHİL EDİLEMEZ.

In [25]:
#clone the model
final_model = tf.keras.models.clone_model(model)

#load the best weights
final_model.load_weights("/content/ModelCheckpoints/model1.ckpt")

#compile the model
final_model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

#evaluate the test dataset
final_model.evaluate(test_dataset_vectorized)



[0.28552237153053284, 0.8859999775886536]

In [26]:
!tensorboard dev list

https://tensorboard.dev/experiment/yGLy3KODTamIar5z7XTjkg/
	Name                 [No Name]
	Description          [No Description]
	Id                   yGLy3KODTamIar5z7XTjkg
	Created              2023-10-01 06:50:01 (5 minutes ago)
	Updated              2023-10-01 06:50:05 (5 minutes ago)
	Runs                 8
	Tags                 5
	Scalars              228
	Tensor bytes         0
	Binary object bytes  231713
https://tensorboard.dev/experiment/gyy8IjoPTyOm7cx9EyE2rw/
	Name                 [No Name]
	Description          [No Description]
	Id                   gyy8IjoPTyOm7cx9EyE2rw
	Created              2023-09-29 08:49:11
	Updated              2023-09-29 08:49:16
	Runs                 8
	Tags                 5
	Scalars              228
	Tensor bytes         0
	Binary object bytes  279251
https://tensorboard.dev/experiment/TXGkSngLSkSRdi6qqoKIvQ/
	Name                 [No Name]
	Description          [No Description]
	Id                   TXGkSngLSkSRdi6qqoKIvQ
	Created             

In [29]:
!tensorboard dev delete --experiment_id TXGkSngLSkSRdi6qqoKIvQ

Deleted experiment TXGkSngLSkSRdi6qqoKIvQ.
