In [1]:
"""
One-cell benchmark
──────────────────
• Builds a tiny CNN
• Trains it once on CPU, once on GPU FP32, once on GPU mixed-FP16
• Shows a live tqdm bar that also prints *samples / second*,
  running loss, and running accuracy.
"""

# ─────────────────────────  Imports  ──────────────────────────
import os, time, platform, tensorflow as tf, numpy as np
from tensorflow.keras import layers, mixed_precision
from tqdm.autonotebook import tqdm

# ─────────────────────  Environment report  ───────────────────
print(f"Python        {platform.python_version()}")
print(f"TensorFlow    {tf.__version__}")
print(f"Visible GPUs  {tf.config.list_physical_devices('GPU')}")
print()

# ───────────────────────  Synthetic data  ─────────────────────
BATCH = 512
N_SAMPLES = 60_000

x = np.random.rand(N_SAMPLES, 32, 32, 3).astype("float32")
y = np.random.randint(10, size=N_SAMPLES).astype("int32")

ds = (tf.data.Dataset
        .from_tensor_slices((x, y))
        .shuffle(4_096)
        .batch(BATCH)
        .prefetch(tf.data.AUTOTUNE))

# ───────────────────────  Build model  ────────────────────────
def build_model(fp16=False):
    if fp16:
        mixed_precision.set_global_policy("mixed_float16")
    else:
        mixed_precision.set_global_policy("float32")

    inp = layers.Input((32, 32, 3))
    x   = layers.Conv2D(64, 3, activation="relu")(inp)
    x   = layers.Conv2D(128, 3, activation="relu")(x)
    x   = layers.GlobalAveragePooling2D()(x)
    out = layers.Dense(10)(x)

    model = tf.keras.Model(inp, out)
    model.compile(
        optimizer="adam",
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=["accuracy"])
    return model


# ───────────────  Callback: tqdm + throughput  ────────────────
class TqdmSpeed(tf.keras.callbacks.Callback):
    def __init__(self, total_steps):
        super().__init__()
        self.total_steps = total_steps

    def on_train_begin(self, logs=None):
        self.t0    = time.time()
        self.bar   = tqdm(total=self.total_steps, unit="batch",
                          bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} "
                                     "[{elapsed}<{remaining}, "
                                     "{rate_fmt}] – {postfix}")

    def on_train_batch_end(self, batch, logs=None):
        self.bar.update(1)
        seen = (batch + 1) * BATCH
        samples_per_sec = seen / (time.time() - self.t0)
        self.bar.set_postfix(
            loss=f"{logs['loss']:.3f}",
            acc=f"{logs['accuracy']:.3f}",
            sps=f"{samples_per_sec:,.0f}"
        )

    def on_train_end(self, logs=None):
        self.bar.close()


# ──────────────────────  Timing helper  ───────────────────────
def time_one_epoch(device, *, fp16=False, label=""):
    tf.keras.backend.clear_session()

    with tf.device(device):
        model = build_model(fp16)
        steps = int(np.ceil(N_SAMPLES / BATCH))
        cb    = TqdmSpeed(steps)

        start = time.perf_counter()
        model.fit(ds, epochs=1, verbose=0, callbacks=[cb])
        secs  = time.perf_counter() - start

    print(f"{label:<15} → {secs:6.2f} s\n")


# ─────────────────────────  Runs  ─────────────────────────────
time_one_epoch("/CPU:0",            label="CPU")
time_one_epoch("/GPU:0",            label="GPU FP32")
time_one_epoch("/GPU:0", fp16=True, label="GPU mixed-FP16")


ModuleNotFoundError: No module named 'tensorflow'