In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load IMDB dataset
(train_data, test_data), info = tfds.load(
    'imdb_reviews',
    split=[tfds.Split.TRAIN, tfds.Split.TEST],
    as_supervised=True,
    with_info=True
)

# Convert dataset to Python lists
train_sentences, train_labels = zip(*[(sent.numpy().decode("utf8"), label.numpy()) for sent, label in train_data])
test_sentences, test_labels = zip(*[(sent.numpy().decode("utf8"), label.numpy()) for sent, label in test_data])

# Tokenize and sequence the data
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
test_sequences = tokenizer.texts_to_sequences(test_sentences)

# Pad sequences
max_length = 120
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post', truncating='post')
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post', truncating='post')


Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.PBQG0H_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.PBQG0H_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.PBQG0H_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [2]:
def serialize_example(feature, label):
    feature = tf.train.Feature(int64_list=tf.train.Int64List(value=feature))
    label = tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
    features = tf.train.Features(feature={'feature': feature, 'label': label})
    example = tf.train.Example(features=features)
    return example.SerializeToString()

def write_tfrecord(filename, sequences, labels):
    with tf.io.TFRecordWriter(filename) as writer:
        for seq, lbl in zip(sequences, labels):
            example = serialize_example(seq, lbl)
            writer.write(example)

# Write TFRecord files
write_tfrecord('train.tfrecord', train_padded, train_labels)
write_tfrecord('test.tfrecord', test_padded, test_labels)


In [3]:
def parse_tfrecord(serialized_example):
    feature_description = {
        'feature': tf.io.FixedLenFeature([max_length], tf.int64),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(serialized_example, feature_description)
    feature = example['feature']
    label = example['label']
    return feature, label

def load_dataset(file_pattern, batch_size):
    dataset = tf.data.TFRecordDataset(file_pattern)
    dataset = dataset.map(parse_tfrecord)
    dataset = dataset.shuffle(10000).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

train_dataset = load_dataset('train.tfrecord', batch_size=64)
test_dataset = load_dataset('test.tfrecord', batch_size=64)


In [4]:
import tensorflow as tf

try:
    # Automatically detect and connect to TPU
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="")  # Leave TPU name empty for Colab
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    print("TPU initialized successfully.")
except Exception as e:
    print("Failed to initialize TPU. Check your Colab runtime:", e)
    strategy = tf.distribute.get_strategy()  # Default to CPU/GPU strategy if TPU is unavailable


Failed to initialize TPU. Check your Colab runtime: Please provide a TPU Name to connect to.


In [5]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.TPUStrategy(resolver)

# Define a simple model
with strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(10000, 128, input_length=max_length),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_dataset, validation_data=test_dataset, epochs=5)


ValueError: Please provide a TPU Name to connect to.