<a href="https://colab.research.google.com/github/alexander-toschev/ml-cs-intro/blob/main/home-work/HW_END_TO_END.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TensorFlow & tf.keras — Homework

This notebook contains **auto-graded tasks** about core TensorFlow and the `tf.keras` high-level API.

Fill in the cells marked with `# TODO` and run the **TEST CELL** below each task.

- Language: English (code + comments)
- Topic focus: **TensorFlow basics** + **end-to-end training with tf.keras**
- Total: **100 points**

After each test cell you will see:
- Points for this task
- Cumulative **TOTAL POINTS**


In [None]:
# @title 1) Student Info & Config
# All code comments are in English.


# === ОБЯЗАТЕЛЬНО ЗАПОЛНИТЬ ===
full_name = "Doe John"     # например: "Тощев Александр"
student_group = "11-111"      # например: "208"
assignment_id = "HW_TENSORFLOW_INTRO"
assert full_name != "Фамилия Имя", "Заполните full_name"
assert student_group != "Группа", "Заполните student_group"
print("✔ Student Info OK")

# Typical human accuracy (benchmark) for MNIST may be ~97-99%.

print("Student:", full_name)

In [None]:
# Global score storage (do not modify)
SCORES = {}

def _set_score(task_name, points, max_points):
    SCORES[task_name] = min(points, max_points)
    total = sum(SCORES.values())
    print(f"Task {task_name}: {SCORES[task_name]} / {max_points} points")
    print(f"TOTAL POINTS: {total} / 100\n")


## 0. Imports and data loading

In this homework we will use the classic **MNIST** dataset of handwritten digits (28×28, grayscale).


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

print("TensorFlow version:", tf.__version__)

# Reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Load MNIST
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Normalize to [0, 1]
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Add channel dim: (N, 28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

num_classes = 10
input_shape = x_train.shape[1:]

print("Train shape:", x_train.shape, "Test shape:", x_test.shape)


## Task 1 (20 points): Basic TensorFlow tensors and operations

Implement the function `tensor_stats(x)`.

**Requirements:**

- Input: 1D `tf.Tensor` of shape `(n,)` with `dtype=tf.float32`.
- Return: a Python `dict` with keys:
  - `"mean"` — mean value
  - `"std"` — standard deviation
  - `"min"` — minimum
  - `"max"` — maximum
- All values in the dict must be **Python floats** (not tensors).
- Use **TensorFlow ops only**:
  - `tf.reduce_mean`, `tf.math.reduce_std`, `tf.reduce_min`, `tf.reduce_max`
- Do **not** convert `x` to NumPy inside the function.


In [None]:
# TODO: implement tensor_stats
def tensor_stats(x: tf.Tensor) -> dict:
    """Return basic statistics for a 1D float32 tensor.

    Args:
        x: tf.Tensor of shape (n,) and dtype float32.

    Returns:
        dict with keys 'mean', 'std', 'min', 'max' as Python floats.
    """
    # YOUR CODE HERE
    mean = tf.reduce_mean(x)
    std = tf.math.reduce_std(x)
    min_v = tf.reduce_min(x)
    max_v = tf.reduce_max(x)

    # Convert to Python floats
    return {
        "mean": float(mean.numpy()),
        "std": float(std.numpy()),
        "min": float(min_v.numpy()),
        "max": float(max_v.numpy()),
    }


In [None]:
# TEST CELL: Task 1 (20 points)
max_points = 20
points = 0

try:
    x = tf.constant([1.0, 2.0, 3.0, 4.0], dtype=tf.float32)
    res = tensor_stats(x)
except Exception as e:
    print("Error when calling tensor_stats:", e)
    _set_score("1", points, max_points)
else:
    # Basic checks
    if isinstance(res, dict):
        points += 5
    else:
        print("tensor_stats should return dict")
    
    for key in ["mean", "std", "min", "max"]:
        if key in res and isinstance(res[key], float):
            points += 3  # 4 keys * 3 = 12
    
    # Numerical checks (simple)
    if abs(res["mean"] - 2.5) < 1e-5 and abs(res["min"] - 1.0) < 1e-5 and abs(res["max"] - 4.0) < 1e-5:
        points += 3

    _set_score("1", points, max_points)


## Task 2 (40 points): Dense neural network for MNIST with `tf.keras`

Implement the function `build_dense_mnist_model(input_shape, num_classes)`.

**Requirements:**

- Use **`tf.keras` / `keras` only**.
- Input: images of shape `input_shape` (e.g. `(28, 28, 1)`).
- Output: probabilities over `num_classes` classes (use `softmax`).
- Architecture:
  - Flatten layer
  - At least **one Dense hidden layer** with ≥ 128 units and **ReLU** activation
  - Final Dense layer with `num_classes` units and **softmax** activation
- The model must be **compiled** with:
  - Optimizer: `Adam` with learning rate `1e-3`
  - Loss: `sparse_categorical_crossentropy`
  - Metric: `accuracy`


In [None]:
# TODO: implement build_dense_mnist_model
def build_dense_mnist_model(input_shape, num_classes):
    """Build and compile a dense neural network for MNIST.

    Args:
        input_shape: tuple, e.g. (28, 28, 1)
        num_classes: int, number of classes (10 for MNIST)

    Returns:
        Compiled tf.keras.Model instance.
    """
    # YOUR CODE HERE
    inputs = keras.Input(shape=input_shape)
    x = keras.layers.Flatten()(inputs)
    x = keras.layers.Dense(128, activation="relu")(x)
    outputs = keras.layers.Dense(num_classes, activation="softmax")(x)
    model = keras.Model(inputs, outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model


In [None]:
# TEST CELL: Task 2 (40 points)
max_points = 40
points = 0

try:
    model_t2 = build_dense_mnist_model(input_shape, num_classes)
except Exception as e:
    print("Error when calling build_dense_mnist_model:", e)
    _set_score("2", points, max_points)
else:
    # Type check
    if isinstance(model_t2, keras.Model):
        points += 5

    # Try a forward pass
    try:
        y_pred = model_t2(x_train[:32])
        if y_pred.shape == (32, num_classes):
            points += 5
    except Exception as e:
        print("Error in forward pass:", e)

    # Check softmax (probabilities close to 1 when summed)
    s = tf.reduce_sum(y_pred[0]).numpy()
    if abs(s - 1.0) < 1e-3:
        points += 5

    # Check that model is compiled (has optimizer, loss, metrics)
    if model_t2.optimizer is not None and model_t2.loss is not None:
        points += 5

    # Train shortly on a small subset to see non-trivial accuracy
    history = model_t2.fit(
        x_train[:2000], y_train[:2000],
        validation_data=(x_test[:1000], y_test[:1000]),
        epochs=3,
        batch_size=128,
        verbose=0,
    )

    val_acc = history.history.get("val_accuracy", [0])[-1]
    print("Validation accuracy after 3 epochs on subset:", val_acc)

    # Reward reasonable accuracy
    if val_acc > 0.85:
        points += 20
    elif val_acc > 0.75:
        points += 10
    elif val_acc > 0.65:
        points += 5

    _set_score("2", points, max_points)


## Task 3 (25 points): `tf.data` pipeline for MNIST

Implement the function `make_mnist_dataset(x, y, batch_size)`.

**Requirements:**

- Create a `tf.data.Dataset` from NumPy arrays `x` and `y`.
- Shuffle the dataset with a buffer size ≥ `len(x) // 4`.
- Batch with the given `batch_size`.
- Prefetch with `tf.data.AUTOTUNE`.
- Return the prepared dataset.


In [None]:
# TODO: implement make_mnist_dataset
def make_mnist_dataset(x, y, batch_size: int) -> tf.data.Dataset:
    """Create a shuffled, batched, prefetched tf.data.Dataset.

    Args:
        x: NumPy array of images.
        y: NumPy array of labels.
        batch_size: int, batch size.

    Returns:
        tf.data.Dataset yielding (batch_x, batch_y).
    """
    # YOUR CODE HERE
    ds = tf.data.Dataset.from_tensor_slices((x, y))
    buffer_size = max(len(x) // 4, 1)
    ds = ds.shuffle(buffer_size=buffer_size, seed=SEED)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds


In [None]:
# TEST CELL: Task 3 (25 points)
max_points = 25
points = 0

try:
    ds = make_mnist_dataset(x_train, y_train, batch_size=64)
except Exception as e:
    print("Error when calling make_mnist_dataset:", e)
    _set_score("3", points, max_points)
else:
    if isinstance(ds, tf.data.Dataset):
        points += 5

    # Inspect one batch
    for batch_x, batch_y in ds.take(1):
        if batch_x.shape[0] <= 64 and batch_x.shape[1:] == input_shape:
            points += 5
        if batch_y.shape[0] == batch_x.shape[0]:
            points += 5
        break

    spec = ds.element_spec
    if isinstance(spec, tuple):
        points += 5

    # Try using in a small training loop
    model_tmp = build_dense_mnist_model(input_shape, num_classes)
    history = model_tmp.fit(ds.take(50), epochs=1, verbose=0)
    train_acc = history.history.get("accuracy", [0])[-1]
    print("Train accuracy on 50 batches:", train_acc)
    if train_acc > 0.5:
        points += 5

    _set_score("3", points, max_points)


## Task 4 (15 points): Custom training step with `tf.GradientTape`

Implement the function `train_one_epoch(model, dataset, optimizer, loss_fn)` that performs **one epoch** of training
with a custom loop.

**Requirements:**

- Iterate over `(x_batch, y_batch)` from `dataset`.
- Use `tf.GradientTape()` to compute gradients of the loss w.r.t. **trainable variables**.
- Apply gradients via `optimizer.apply_gradients(...)`.
- Accumulate the mean loss for the epoch and return it as a Python float.


In [None]:
# TODO: implement train_one_epoch
def train_one_epoch(model, dataset, optimizer, loss_fn) -> float:
    """Run one epoch of custom training loop.

    Args:
        model: tf.keras.Model
        dataset: tf.data.Dataset yielding (x_batch, y_batch)
        optimizer: tf.keras.optimizers.Optimizer
        loss_fn: callable loss function

    Returns:
        Average loss over the epoch (Python float).
    """
    # YOUR CODE HERE
    total_loss = 0.0
    num_batches = 0

    for x_batch, y_batch in dataset:
        with tf.GradientTape() as tape:
            logits = model(x_batch, training=True)
            loss_value = loss_fn(y_batch, logits)

        grads = tape.gradient(loss_value, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        total_loss += float(loss_value.numpy())
        num_batches += 1

    if num_batches == 0:
        return 0.0
    return total_loss / num_batches


In [None]:
# TEST CELL: Task 4 (15 points)
max_points = 15
points = 0

try:
    # Small model & dataset
    model_small = build_dense_mnist_model(input_shape, num_classes)
    ds_small = make_mnist_dataset(x_train[:2000], y_train[:2000], batch_size=128)
    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = keras.optimizers.Adam(learning_rate=1e-3)

    # Compute initial loss on one batch
    x0, y0 = next(iter(ds_small))
    initial_logits = model_small(x0, training=False)
    initial_loss = loss_fn(y0, initial_logits).numpy()

    avg_loss_epoch = train_one_epoch(model_small, ds_small.take(20), optimizer, loss_fn)
    print("Average loss over custom epoch:", avg_loss_epoch)

    # Loss should be a float
    if isinstance(avg_loss_epoch, float):
        points += 5

    # After one epoch, loss on the same batch should not increase a lot (ideally decrease)
    new_logits = model_small(x0, training=False)
    new_loss = loss_fn(y0, new_logits).numpy()
    print("Initial loss:", initial_loss, "New loss:", new_loss)

    if new_loss <= initial_loss + 0.05:
        points += 10

except Exception as e:
    print("Error in Task 4 test:", e)

_set_score("4", points, max_points)


In [None]:
print("Final SCORES dict:", SCORES)
print("If TOTAL POINTS above is 100, you have full score :)")