In [21]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TPU node sandbox



## Environment Setup

In [22]:
import tensorflow as tf
import numpy as np
import pandas as pd

import os
import tensorflow_datasets as tfds

import tensorflow_hub as hub
import tensorflow_text as text

from cloud_tpu_client import Client

## Configure GCP settings

In [23]:
PROJECT = 'jk-mlops-dev'
REGION = 'us-central1'
TPU_NODE_NAME = 'jk-tpu-node'
ZONE = 'us-central1-a'
GCS_BUCKET = 'gs://jk-tpu-staging'

c = Client(tpu=TPU_NODE_NAME, zone=ZONE)

print(c.accelerator_type())
print(c.name())
print(c.state())
print(c.runtime_version())

v3-8
jk-tpu-node
READY
2.5.0


In [24]:
c.configure_tpu_version(tf.__version__, restart_type='ifNeeded')
c.wait_for_healthy()



Note: The TPU initialization code has to be at the beginning of your program.

In [25]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=TPU_NODE_NAME)
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

2021-09-11 15:49:14.685126: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job worker -> {0 -> 10.122.28.50:8470}
2021-09-11 15:49:14.685216: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job localhost -> {0 -> localhost:41332}
2021-09-11 15:49:14.710853: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job worker -> {0 -> 10.122.28.50:8470}
2021-09-11 15:49:14.710916: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job localhost -> {0 -> localhost:41332}






INFO:tensorflow:Initializing the TPU system: jk-tpu-node


INFO:tensorflow:Initializing the TPU system: jk-tpu-node


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU')]


In [26]:
MODEL_URI = 'gs://tfhub-modules/tensorflow/bert_en_cased_L-24_H-1024_A-16/3/uncompressed'
PREPROCESSOR_URI = 'gs://tfhub-modules/tensorflow/bert_en_cased_preprocess/3/uncompressed'

In [27]:
def get_bert_model(seq_len=128):
    
    bert_layer = hub.KerasLayer(
        handle=MODEL_URI, 
        trainable=True,
        name='encoder'
    )
    encoder_inputs = dict(
        input_word_ids = tf.keras.layers.Input(shape=(seq_len,), dtype=tf.int32, name="input_word_ids"),
        input_mask = tf.keras.layers.Input(shape=(seq_len,), dtype=tf.int32, name="input_mask"),
        input_type_ids = tf.keras.layers.Input(shape=(seq_len,), dtype=tf.int32, name="segment_ids"),
    )
    encoder_outputs = bert_layer(encoder_inputs)
    embedding = encoder_outputs["pooled_output"]
    logits = tf.keras.layers.Dense(1)(embedding)
    softmax_prob = tf.keras.layers.Softmax()(logits)
    model = tf.keras.models.Model(inputs=encoder_inputs,
                                  outputs=softmax_prob)
    
    return model

In [28]:
def create_bert_preprocess_model(seq_length=128):

  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text_input')

  bert_preprocess = hub.load(PREPROCESSOR_URI)
  tokenizer = hub.KerasLayer(bert_preprocess.tokenize, name='tokenizer')
  packer = hub.KerasLayer(bert_preprocess.bert_pack_inputs,
                          arguments=dict(seq_length=seq_length),
                          name='packer')                                  
                                     
  tokenized_inputs = [tokenizer(segment) for segment in [text_input]]
  model_inputs = packer(tokenized_inputs)
                                     
  return tf.keras.Model(text_input, model_inputs)

In [29]:
tx = tf.constant(['abc def','xyz abc apple', 'abc xyz', 'appel pen'], dtype=tf.string)
ty = tf.constant([1, 1, 0, 0], dtype=tf.int64)
vx = tf.constant(['apple peer','abc apple', 'xyz', 'vvvv'], dtype=tf.string)
vy = tf.constant([1, 0, 0, 1], dtype=tf.int64)

In [30]:
preprocessor = create_bert_preprocess_model()

In [31]:
tx_inputs = preprocessor(tx)

In [32]:
strategy = tf.distribute.TPUStrategy(resolver)
#strategy = tf.distribute.get_strategy()

with strategy.scope():
    model = get_bert_model()
    optimizer = tf.keras.optimizers.Adam()
    model.compile(optimizer=optimizer, loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])
    model.summary()

INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_mask (InputLayer)         [(None, 128)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_word_ids (InputLayer)     [(None, 128)]        0                                            
__________________________________________________________________________________________________
encoder (KerasLayer)            {'pooled_output': (N 333579265   input_mask[0][0]                 
                                                                 segment_ids[0][0]          

In [33]:
model.fit(
    x=tx_inputs,
    y=ty,
    epochs=10
)

Epoch 1/10


  "shape. This may consume a large amount of memory." % value)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fbf243b8e10>

In [34]:
#dataset = tf.data.Dataset.from_tensor_slices((tx_inputs, ty)).batch(2)

dataset = tf.data.Dataset.from_tensor_slices((tx, ty)).batch(2)
dataset = dataset.map(lambda x, y: (preprocessor(x), y))


In [35]:
model.fit(x=dataset,
          epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc03c458450>