In [None]:
!pip -q install tensorflow-text==2.8.1
!pip -q install tf-models-official==2.8.0

[K     |████████████████████████████████| 4.9 MB 5.5 MB/s 
[?25h

In [None]:
!pip install tensorflow-datasets==4.5.2

Collecting tensorflow-datasets==4.5.2
  Downloading tensorflow_datasets-4.5.2-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 5.2 MB/s 
Installing collected packages: tensorflow-datasets
  Attempting uninstall: tensorflow-datasets
    Found existing installation: tensorflow-datasets 4.0.1
    Uninstalling tensorflow-datasets-4.0.1:
      Successfully uninstalled tensorflow-datasets-4.0.1
Successfully installed tensorflow-datasets-4.5.2


In [None]:
import os
import shutil

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization

import matplotlib.pyplot as plt
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
import numpy as np
import tensorflow_datasets as tfds

tf.get_logger().setLevel('ERROR')

In [None]:
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

AUTOTUNE = tf.data.experimental.AUTOTUNE
print("REPLICAS: ", strategy.num_replicas_in_sync)

Running on TPU  grpc://10.58.5.114:8470
REPLICAS:  8


In [None]:
BATCH_SIZE = 32 * strategy.num_replicas_in_sync
AUTOTUNE = tf.data.AUTOTUNE
seed = 42

In [None]:
from kaggle_datasets import KaggleDatasets
GCS_DS_PATH = KaggleDatasets().get_gcs_path('sentiment140')
GCS_DS_PATH

In [None]:
data = tf.io.gfile.glob(GCS_DS_PATH + "/*.csv")[0]
data

In [None]:
df = pd.read_csv(data, encoding='latin-1', header=None, names=['label', 'id', 'date', 'flag', 'user', 'text'])

In [None]:
df.label.replace({0:0 , 4: 1}, inplace=True)
df.label = df.label.astype('int32')

In [None]:
# https://www.kaggle.com/code/prashant268/sentiment-analysis-lstm/notebook

stop_words=set(stopwords.words('english'))
stop_words.remove('not')

corpus=[]
for i in range(0, len(df)):
    review=re.sub('@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+',' ', df['text'][i])
    review=review.lower()
    review=review.split()
#     review=[word for word in review if not word in stop_words]
    review=' '.join(review)
    corpus.append(review)
    
df.text=corpus
df.head()

In [None]:
from sklearn.model_selection import train_test_split

train_df,test_df = train_test_split(df, test_size=0.20,random_state=seed)
train_df,val_df = train_test_split(train_df, test_size=0.25,random_state=seed)

In [None]:
train_df = train_df.loc[:, ['label', 'text']]
train_df.reset_index(drop=True)

val_df = val_df.loc[:, ['label', 'text']]
val_df.reset_index(drop=True)

test_df = test_df.loc[:, ['label', 'text']]
test_df.reset_index(drop=True)

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_df.text, train_df.label)).batch(BATCH_SIZE).cache().prefetch(buffer_size=AUTOTUNE)
val_ds = tf.data.Dataset.from_tensor_slices((val_df.text, val_df.label)).batch(BATCH_SIZE).cache().prefetch(buffer_size=AUTOTUNE)
test_ds = tf.data.Dataset.from_tensor_slices((test_df.text, test_df.label)).batch(BATCH_SIZE).cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
for item in train_ds.take(1):
    print(item)
    break

In [None]:
os.environ["TFHUB_MODEL_LOAD_FORMAT"]="UNCOMPRESSED"

In [None]:
bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8' 

map_name_to_handle = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_base/2',
    'electra_small':
        'https://tfhub.dev/google/electra_small/2',
    'electra_base':
        'https://tfhub.dev/google/electra_base/2',
    'experts_pubmed':
        'https://tfhub.dev/google/experts/bert/pubmed/2',
    'experts_wiki_books':
        'https://tfhub.dev/google/experts/bert/wiki_books/2',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',
}

map_model_to_preprocess = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_preprocess/3',
    'electra_small':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'electra_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_pubmed':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_wiki_books':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
}

tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

BERT model selected           : https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1
Preprocess model auto-selected: https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3


In [None]:
def make_bert_preprocess_model(sentence_features, seq_length=128):
    """Returns Model mapping string features to BERT inputs.

    Args:
    sentence_features: a list with the names of string-valued features.
    seq_length: an integer that defines the sequence length of BERT inputs.

    Returns:
    A Keras Model that can be called on a list or dict of string Tensors
    (with the order or names, resp., given by sentence_features) and
    returns a dict of tensors for input to BERT.
    """

    input_segments = [
      tf.keras.layers.Input(shape=(), dtype=tf.string, name=ft)
      for ft in sentence_features]

    # Tokenize the text to word pieces.
    bert_preprocess = hub.load(tfhub_handle_preprocess)
    tokenizer = hub.KerasLayer(bert_preprocess.tokenize, name='tokenizer')
    segments = [tokenizer(s) for s in input_segments]

    # Optional: Trim segments in a smart way to fit seq_length.
    # Simple cases (like this example) can skip this step and let
    # the next step apply a default truncation to approximately equal lengths.
    truncated_segments = segments

    # Pack inputs. The details (start/end token ids, dict of output tensors)
    # are model-dependent, so this gets loaded from the SavedModel.
    packer = hub.KerasLayer(bert_preprocess.bert_pack_inputs,
                          arguments=dict(seq_length=seq_length),
                          name='packer')
    model_inputs = packer(truncated_segments)
    return tf.keras.Model(input_segments, model_inputs)

In [None]:
tfds_name = 'sentiment140' 

tfds_info = tfds.builder(tfds_name).info

sentence_features = list(tfds_info.features.keys())
sentence_features.remove('polarity')
sentence_features.remove('date')
sentence_features.remove('query')
sentence_features.remove('user')

available_splits = list(tfds_info.splits.keys())
train_split = 'train'
validation_split = 'validation'
test_split = 'test'

num_classes = 1 #tfds_info.features['polarity'].num_classes
num_examples = tfds_info.splits.total_num_examples

print(f'Using {tfds_name} from TFDS')
print(f'This dataset has {num_examples} examples')
print(f'Number of classes: {num_classes}')
print(f'Features {sentence_features}')
print(f'Splits {available_splits}')

with tf.device('/job:localhost'):
  # batch_size=-1 is a way to load the dataset into memory
  in_memory_ds = tfds.load(tfds_name, batch_size=-1, shuffle_files=True)

# The code below is just to show some samples from the selected dataset
# print(f'Here are some sample rows from {tfds_name} dataset')
# sample_dataset = tf.data.Dataset.from_tensor_slices(in_memory_ds[train_split])

# labels_names = tfds_info.features['label'].names
# print(labels_names)
# print()

# sample_i = 1
# for sample_row in sample_dataset.take(5):
#     samples = [sample_row[feature] for feature in sentence_features]
#     print(f'sample row {sample_i}')
#     for sample in samples:
#         print(sample.numpy())
#     sample_label = sample_row['label']

#     print(f'label: {sample_label} ({labels_names[sample_label]})')
#     print()
#     sample_i += 1

Using sentiment140 from TFDS
This dataset has 1600498 examples
Number of classes: 1
Features ['text']
Splits ['test', 'train']
[1mDownloading and preparing dataset 77.59 MiB (download: 77.59 MiB, generated: 305.13 MiB, total: 382.73 MiB) to /root/tensorflow_datasets/sentiment140/1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/1600000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/sentiment140/1.0.0.incomplete0D3183/sentiment140-train.tfrecord*...:   0%|…

Generating test examples...:   0%|          | 0/498 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/sentiment140/1.0.0.incomplete0D3183/sentiment140-test.tfrecord*...:   0%| …

[1mDataset sentiment140 downloaded and prepared to /root/tensorflow_datasets/sentiment140/1.0.0. Subsequent calls will reuse this data.[0m


In [None]:
def load_dataset_from_tfds(in_memory_ds, info, split, batch_size,
                           bert_preprocess_model):
    is_training = split.startswith('train')
    dataset = tf.data.Dataset.from_tensor_slices(in_memory_ds[split])
    num_examples = info.splits[split].num_examples

    if is_training:
        dataset = dataset.shuffle(num_examples)
        dataset = dataset.repeat()
        dataset = dataset.batch(batch_size)
        dataset = dataset.map(lambda ex: (bert_preprocess_model(ex), ex['polarity']))
        dataset = dataset.map(lambda ex, label: (ex, process_label(label)))
        dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)
        
    else:
        dataset = dataset.batch(batch_size)
        dataset = dataset.map(lambda ex: (bert_preprocess_model(ex), ex['polarity']))
        dataset = dataset.map(lambda ex, label: (ex, process_label(label)))
        dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)
        
    return dataset, num_examples

In [None]:
def process_label(label):
    indices = tf.where(tf.equal(label, 4), 1, 0)
    return indices

In [None]:
def build_classifier_model(num_classes):

    class Classifier(tf.keras.Model):
        def __init__(self, num_classes):
            super(Classifier, self).__init__(name="prediction")
            self.encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True)
            self.dropout = tf.keras.layers.Dropout(0.5)
            self.dense = tf.keras.layers.Dense(num_classes)

        def call(self, preprocessed_text):
            encoder_outputs = self.encoder(preprocessed_text)
            pooled_output = encoder_outputs["pooled_output"]
            x = self.dropout(pooled_output)
            x = self.dense(x)
            return x

    model = Classifier(num_classes)
    return model

In [None]:
# with strategy.scope():
#     classifier_model = build_classifier_model()

#     loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
#     metrics = tf.metrics.BinaryAccuracy()

#     epochs = 5
#     steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy()
#     num_train_steps = steps_per_epoch * epochs
#     num_warmup_steps = int(0.1*num_train_steps)

#     init_lr = 3e-5
#     optimizer = optimization.create_optimizer(init_lr=init_lr,
#                                               num_train_steps=num_train_steps,
#                                               num_warmup_steps=num_warmup_steps,
#                                               optimizer_type='adamw')

#     classifier_model.compile(optimizer=optimizer,
#                              loss=loss,
#                              metrics=metrics)

In [None]:
# train_dataset, train_data_size = load_dataset_from_tfds(
#       in_memory_ds, tfds_info, train_split, batch_size, bert_preprocess_model)

In [None]:
# for item in train_dataset.take(1):
#     print(item)
#     break

In [None]:
epochs = 1
BATCH_SIZE = 128 * 8
init_lr = 2e-5

print(f'Fine tuning {tfhub_handle_encoder} model')
bert_preprocess_model = make_bert_preprocess_model(sentence_features)

with strategy.scope():

    # metric have to be created inside the strategy scope
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    metrics = tf.metrics.BinaryAccuracy()


    train_dataset, train_data_size = load_dataset_from_tfds(
      in_memory_ds, tfds_info, train_split, BATCH_SIZE, bert_preprocess_model)
    steps_per_epoch = train_data_size // batch_size
    num_train_steps = steps_per_epoch * epochs
    num_warmup_steps = num_train_steps // 10

    test_dataset, test_data_size = load_dataset_from_tfds(
      in_memory_ds, tfds_info, test_split, batch_size=8,
      bert_preprocess_model)
    test_steps = test_data_size // 8

    classifier_model = build_classifier_model(num_classes)

    optimizer = optimization.create_optimizer(
      init_lr=init_lr,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      optimizer_type='adamw')

    classifier_model.compile(optimizer=optimizer, loss=loss, metrics=[metrics])

Fine tuning https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1 model


  inputs = self._flatten_to_reference_inputs(inputs)


In [None]:
classifier_model.fit(
      x=train_dataset,
      validation_data=test_dataset,
      steps_per_epoch=steps_per_epoch,
      epochs=epochs,
      validation_steps=test_steps)

Epoch 1/10


  "shape. This may consume a large amount of memory." % value)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f98f4f83f50>

In [None]:
main_save_path = '/content'
bert_type = tfhub_handle_encoder.split('/')[-2]
saved_model_name = f'{tfds_name.replace("/", "_")}_{bert_type}'

saved_model_path = os.path.join(main_save_path, saved_model_name)

preprocess_inputs = bert_preprocess_model.inputs
bert_encoder_inputs = bert_preprocess_model(preprocess_inputs)
bert_outputs = classifier_model(bert_encoder_inputs)
model_for_export = tf.keras.Model(preprocess_inputs, bert_outputs)

print('Saving', saved_model_path)

# Save everything on the Colab host (even the variables from TPU memory)
save_options = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
model_for_export.save(saved_model_path, include_optimizer=False,
                      options=save_options)

Saving /content/sentiment140_bert_en_uncased_L-4_H-512_A-8




In [None]:
with tf.device('/job:localhost'):
  reloaded_model = tf.saved_model.load(saved_model_path)
  # reloaded_model = tf.keras.models.load_model('/content/sentiment140_bert_en_uncased_L-4_H-512_A-8')

In [None]:
def prepare(record):
    model_inputs = [[record[ft]] for ft in sentence_features]
    return model_inputs

with tf.device('/job:localhost'):
  test_dataset = tf.data.Dataset.from_tensor_slices(in_memory_ds[test_split])
  for test_row in test_dataset.shuffle(1000).map(prepare).take(5):
      print(test_row)
      result = reloaded_model(test_row[0])
      print(result)

(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b'Found NOTHING at Nike Factory :/ Off to Banana Republic Outlet! http://myloc.me/2zic'],
      dtype=object)>,)
tf.Tensor([[-2.8902736]], shape=(1, 1), dtype=float32)
(<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'lebron and zydrunas are such an awesome duo'], dtype=object)>,)
tf.Tensor([[4.532045]], shape=(1, 1), dtype=float32)
(<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'My Kindle2 came and I LOVE it! :)'], dtype=object)>,)
tf.Tensor([[4.2519464]], shape=(1, 1), dtype=float32)
(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"I'm really loving the new search site Wolfram/Alpha. Makes Google seem so ... quaint. http://www72.wolframalpha.com/"],
      dtype=object)>,)
tf.Tensor([[3.9383612]], shape=(1, 1), dtype=float32)
(<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"@KarrisFoxy If you're being harassed by calls about your car warranty, changing your number won't fix that. They call every number. #d-bags

In [None]:
with tf.device('/job:localhost'):
  sentence = [""]
  sentence = tf.constant(sentence)
  print(reloaded_model(sentence))
  print(tf.sigmoid(reloaded_model(tf.constant(sentence))))

tf.Tensor([[-0.4316727]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.393727]], shape=(1, 1), dtype=float32)


In [None]:
!zip -r /content/model.zip /content/sentiment140_bert_en_uncased_L-4_H-512_A-8

  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/ (stored 0%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/saved_model.pb (deflated 92%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/keras_metadata.pb (deflated 83%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/assets/ (stored 0%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/assets/vocab.txt (deflated 53%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/variables/ (stored 0%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/variables/variables.index (deflated 80%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/variables/variables.data-00000-of-00001 (deflated 12%)
  adding: content/sentiment140_bert_en_uncased_L-4_H-512_A-8/model.zip (stored 0%)


In [None]:
from google.colab import files
files.download("/content/model.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>