<a href="https://colab.research.google.com/github/YoramGilboa/python-mini-projects/blob/master/A_Simple_TF_2_1_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implementation using some sort of DevOps best practice:
Code = in GitHub,
Data = in GCS in the team-brisket-flowers-tpu bucket
Compute = in Colab

First need to get the flowers_data file if I don't have it already

In [6]:
from google.colab import files
files.upload()  # Upload kaggle.json

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"atlantaactuary","key":"6ffd663c92d9b81a32c0721edf46ff50"}'}

In [7]:
import os
os.makedirs('/root/.kaggle', exist_ok=True)
!mv kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json


In [8]:
!pip install -q kaggle

In [9]:
!kaggle competitions download -c flower-classification-with-tpus


In [10]:
!ls


flower-classification-with-tpus.zip  sample_data


In [11]:
!unzip -q flower-classification-with-tpus.zip -d flowers_data


In [12]:
!gsutil -m cp -r flowers_data gs://team-brisket-flowers-tpu/flowers_data


Copying file://flowers_data/sample_submission.csv [Content-Type=text/csv]...
/ [0/193 files][    0.0 B/  4.8 GiB]   0% Done                                  Copying file://flowers_data/tfrecords-jpeg-512x512/val/00-512x512-232.tfrec [Content-Type=application/octet-stream]...
/ [0/193 files][    0.0 B/  4.8 GiB]   0% Done                                  Copying file://flowers_data/tfrecords-jpeg-512x512/val/07-512x512-232.tfrec [Content-Type=application/octet-stream]...
/ [0/193 files][    0.0 B/  4.8 GiB]   0% Done                                  Copying file://flowers_data/tfrecords-jpeg-512x512/val/10-512x512-232.tfrec [Content-Type=application/octet-stream]...
/ [0/193 files][    0.0 B/  4.8 GiB]   0% Done                                  Copying file://flowers_data/tfrecords-jpeg-512x512/val/14-512x512-232.tfrec [Content-Type=application/octet-stream]...
/ [0/193 files][    0.0 B/  4.8 GiB]   0% Done                                  Copying file://flowers_data/tfrecords-jpeg

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Kaggle credentials set.
Kaggle credentials successfully validated.


# A Simple TF 2.1 notebook

This is based entirely off of Martin Gorner's excellent starter notebook, and is intended solely as a simple, shorter introduction to the operations being performed there.

In [1]:
!pip install -U tensorflow



In [2]:
import tensorflow as tf
#from kaggle_datasets import KaggleDatasets
import numpy as np

print("Tensorflow version " + tf.__version__)

Tensorflow version 2.19.0


# Detect my accelerator

In [4]:
import os

# Check if the environment variable exists before accessing it
if 'COLAB_TPU_ADDR' in os.environ:
    tpu_address = os.environ['COLAB_TPU_ADDR']
    print(f"TPU address: {tpu_address}")  # should return a TPU address like 10.240.x.x:8470
else:
    print("TPU address not found. Check your Colab environment.")

TPU address not found. Check your Colab environment.


In [24]:
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


# Get my data path

In [16]:
#GCS_DS_PATH = KaggleDatasets().get_gcs_path() # you can list the bucket with "!gsutil ls $GCS_DS_PATH"
GCS_DS_PATH = 'gs://team-brisket-flowers-tpu/flowers_data'

In [17]:
!gsutil ls $GCS_DS_PATH

gs://team-brisket-flowers-tpu/flowers_data/sample_submission.csv
gs://team-brisket-flowers-tpu/flowers_data/tfrecords-jpeg-192x192/
gs://team-brisket-flowers-tpu/flowers_data/tfrecords-jpeg-224x224/
gs://team-brisket-flowers-tpu/flowers_data/tfrecords-jpeg-331x331/
gs://team-brisket-flowers-tpu/flowers_data/tfrecords-jpeg-512x512/


# Set some parameters

In [18]:
IMAGE_SIZE = [192, 192] # at this size, a GPU will run out of memory. Use the TPU
EPOCHS = 5
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

NUM_TRAINING_IMAGES = 12753
NUM_TEST_IMAGES = 7382
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
AUTO = tf.data.experimental.AUTOTUNE

# Load my data

This data is loaded from Kaggle and automatically sharded to maximize parallelization.

In [19]:
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    image = tf.reshape(image, [*IMAGE_SIZE, 3]) # explicit size needed for TPU
    return image

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "class": tf.io.FixedLenFeature([], tf.int64),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.cast(example['class'], tf.int32)
    return image, label # returns a dataset of (image, label) pairs

def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "id": tf.io.FixedLenFeature([], tf.string),  # shape [] means single element
        # class is missing, this competitions's challenge is to predict flower classes for the test dataset
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    idnum = example['id']
    return image, idnum # returns a dataset of image(s)

def load_dataset(filenames, labeled=True, ordered=False):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # disregarding data order. Order does not matter since we will be shuffling the data anyway.

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=AUTO)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset

def get_training_dataset():
    dataset = load_dataset(tf.io.gfile.glob(GCS_DS_PATH + '/tfrecords-jpeg-192x192/train/*.tfrec'), labeled=True)
    dataset = dataset.repeat() # the training dataset must repeat for several epochs
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

def get_validation_dataset():
    dataset = load_dataset(tf.io.gfile.glob(GCS_DS_PATH + '/tfrecords-jpeg-192x192/val/*.tfrec'), labeled=True, ordered=False)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.cache()
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

def get_test_dataset(ordered=False):
    dataset = load_dataset(tf.io.gfile.glob(GCS_DS_PATH + '/tfrecords-jpeg-192x192/test/*.tfrec'), labeled=False, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

training_dataset = get_training_dataset()
validation_dataset = get_validation_dataset()

# Build a model on TPU (or GPU, or CPU...) with Tensorflow 2.1!

In [21]:
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()  # detect
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    print("✅ TPU connected.")
except:
    strategy = tf.distribute.get_strategy()
    print("⚠️ Using default strategy (CPU/GPU).")


⚠️ Using default strategy (CPU/GPU).


In [20]:
with strategy.scope():
    pretrained_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
    pretrained_model.trainable = False # tramsfer learning

    model = tf.keras.Sequential([
        pretrained_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(104, activation='softmax')
    ])

model.compile(
    optimizer='adam',
    loss = 'sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
)

historical = model.fit(training_dataset,
          steps_per_epoch=STEPS_PER_EPOCH,
          epochs=EPOCHS,
          validation_data=validation_dataset)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/5
[1m 15/797[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m43:40[0m 3s/step - loss: 4.6426 - sparse_categorical_accuracy: 0.0228

KeyboardInterrupt: 

# Compute your predictions on the test set!

This will create a file that can be submitted to the competition.

In [None]:
test_ds = get_test_dataset(ordered=True) # since we are splitting the dataset and iterating separately on images and ids, order matters.

print('Computing predictions...')
test_images_ds = test_ds.map(lambda image, idnum: image)
probabilities = model.predict(test_images_ds)
predictions = np.argmax(probabilities, axis=-1)
print(predictions)

print('Generating submission.csv file...')
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U') # all in one batch
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='id,label', comments='')