# Notebook Order for Ch.3

## Core Concepts:
1.	03a_transfer_learning.ipynb → frozen feature extractor (MobileNetV2)
2.	03b_finetune_MOBILENETV2_flowers5.ipynb → lightweight fine-tuning
3.	03b_finetune_experiment_lr_decay_xception_flowers104.ipynb → Xception fine-tuning
________________________________________
## Extended Concepts:
4.	03c_fromzero_ALEXNET_flowers104.ipynb → small-from-scratch experiment
5.	03g_finetune_RESNET50_flowers104.ipynb → pick one classic large net for fine-tuning
6.	03z_ensemble_finetune_flowers104.ipynb → simplified ensemble (no retraining huge models)

***NOTE: These use matplotlib for inline graphs because the confusion matrix for each is too large to visualize in a CSV file with Excel.***
________________________________________
## Optional / trace-only
7.	03m_transformer_flowers104.ipynb → conceptual, no heavy training


In [1]:
# ============================================================
# MobileNet Transfer Learning (TFDS, Local-Friendly Version)
# ============================================================
#
# This notebook demonstrates transfer learning using a
# pre-trained MobileNetV2 model and the TFDS `tf_flowers`
# dataset. It is designed to run reliably on local machines
# (Windows/macOS/Linux) without relying on cloud storage,
# GCS paths, or matplotlib.
#
# Goals:
# - Understand transfer learning with CNN feature extractors
# - Learn how pre-trained models accelerate training
# - Practice working with tf.data pipelines
# - Export results for inspection outside Python
#
# ============================================================


# ----------------------------
# Imports
# ----------------------------
import os
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd


print("TensorFlow version:", tf.__version__)


# ----------------------------
# Configuration
# ----------------------------
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3

BATCH_SIZE = 32
EPOCHS = 5

CLASS_NAMES = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']

OUTPUT_DIR = "03a_mobilenet_outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)


# ============================================================
# Load Dataset (TFDS)
# ============================================================
#
# We use TensorFlow Datasets instead of CSV files or cloud
# storage paths. This ensures:
# - No manual downloads
# - No filesystem configuration
# - Reproducible experiments
#
# The tf_flowers dataset contains 5 flower classes.
#
# ============================================================

(ds_train, ds_val), ds_info = tfds.load(
    "tf_flowers",
    split=["train[:80%]", "train[80%:]"],
    as_supervised=True,
    with_info=True
)

print("Dataset loaded:")
print(ds_info)


# ============================================================
# Preprocessing Pipeline
# ============================================================
#
# MobileNetV2 expects:
# - Images resized to 224x224
# - Float32 inputs
# - MobileNet-specific preprocessing
#
# ============================================================

def preprocess(image, label):
    image = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
    image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
    return image, label


train_ds = (
    ds_train
    .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .shuffle(1000)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

val_ds = (
    ds_val
    .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

print("Training and validation datasets ready.")


# ============================================================
# Build MobileNet Transfer Learning Model
# ============================================================
#
# We use MobileNetV2 pretrained on ImageNet as a fixed
# feature extractor and train a small classification head.
#
# ============================================================

def build_model(num_hidden=16, learning_rate=0.001):
    base_model = tf.keras.applications.MobileNetV2(
        weights="imagenet",
        include_top=False,
        input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
    )

    # Freeze pretrained weights (transfer learning)
    base_model.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(num_hidden, activation="relu", name="dense_hidden"),
        tf.keras.layers.Dense(len(CLASS_NAMES), activation="softmax", name="flower_prob")
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model


model = build_model()
model.summary()


# ============================================================
# Train Model
# ============================================================

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)


# ============================================================
# Save Training History (CSV)
# ============================================================
#
# Instead of plotting with matplotlib, we export metrics
# so students can:
# - Open them in Excel
# - Create their own plots
# - Submit them as artifacts
#
# ============================================================

def save_training_history(history, filename):
    df = pd.DataFrame(history.history)
    path = os.path.join(OUTPUT_DIR, filename)
    df.to_csv(path, index=False)
    print(f"Saved training history to {path}")


save_training_history(history, "mobilenet_training_history.csv")


# ============================================================
# Run Predictions and Save Results
# ============================================================
#
# This replaces image grids with a simple, inspectable CSV:
# - True label
# - Predicted label
# - Model confidence
#
# ============================================================

def predict_and_save(model, dataset, filename, n_samples=20):
    results = []

    # Work with individual images
    unbatched = dataset.unbatch()

    for image, label in unbatched.take(n_samples):
        image = tf.expand_dims(image, axis=0)
        probs = model.predict(image, verbose=0)[0]
        pred_index = tf.argmax(probs).numpy()

        results.append([
            CLASS_NAMES[label.numpy()],
            CLASS_NAMES[pred_index],
            float(probs[pred_index])
        ])

    df = pd.DataFrame(
        results,
        columns=["true_label", "predicted_label", "confidence"]
    )

    path = os.path.join(OUTPUT_DIR, filename)
    df.to_csv(path, index=False)
    print(f"Saved predictions to {path}")


predict_and_save(model, val_ds, "mobilenet_predictions.csv")


# ============================================================
# Summary
# ============================================================
#
# - We reused a CNN pretrained on ImageNet
# - We froze its weights and trained a new classifier
# - TFDS handled data loading and labeling
# - Results were exported for analysis outside Python
#
# This same workflow generalizes to larger datasets
# and more advanced fine-tuning experiments.
#
# ============================================================

TensorFlow version: 2.9.1
Dataset loaded:
tfds.core.DatasetInfo(
    name='tf_flowers',
    full_name='tf_flowers/3.0.1',
    description="""
    A large set of images of flowers
    """,
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    data_dir='C:\\Users\\Jason Eckert\\tensorflow_datasets\\tf_flowers\\3.0.1',
    file_format=tfrecord,
    download_size=218.21 MiB,
    dataset_size=221.83 MiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=5),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    nondeterministic_order=False,
    splits={
        'train': <SplitInfo num_examples=3670, num_shards=2>,
    },
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
)
Training and