- Implement CNN from Page 496 (Chapter14) for the Fashion MNIST dataset (Chapter 10, Page 318) 

In [1]:
# NVTOP
import numpy as np

In [None]:
import tensorflow as tf

# Testen, ob TensorFlow korrekt installiert a
print("TensorFlow version:", tf.__version__)
print("Available CPUs:", tf.config.list_physical_devices('CPU'))
print("Available GPUs:", tf.config.list_physical_devices('GPU'))

In [3]:
# Fixes memory issue
# Before this: compiled model reserves nearly all of the GDDR (7 of 8 GB GDDR) -> crashed
# This fixes memory usage to about 20 %
# Here memory is only reserved when needed (dynamic)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [4]:
# Load famous fashion MNIST dataset
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
# Already shuffled and split into training set (60k images) and test set (10k images)
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
# Hold out the last 5k images from training set for validation
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

In [None]:
# Keras images are represented by 28x28 rather than 1-D 784 (sci-kit learn)
X_train.shape

In [None]:
# Keras pixel intensity integers 0-255, rather than float 0.0-255.0 (sci-kit learn)
X_train.dtype

In [7]:
# Simplicity: Scale down to 0-1 range and transform to float by dividing by 255.0
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.

In [8]:
# Add greyscale dimension to fit into expected model input
# Alternatives, np.reshape, np.expanddims, or Reshape layer in CNN
X_train, X_valid, X_test = X_train[:,:,:, np.newaxis], X_valid[:,:,:, np.newaxis], X_test[:,:,:, np.newaxis]

In [None]:
# Dimension now [28, 28, 1]
X_train.shape

In [10]:
# Connect output to classes defined in "https://keras.io/api/datasets/fashion_mnist/"
class_names = ["T-shirt/top", "Trousers", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [None]:
# y_train contains 4,9,0,2 class labels
class_names[y_train[0]]

## Build the CNN
- Page 496

In [None]:
# partial introduced in Chapter 11
from functools import partial

# Acts like Conv2D but with different default arguments:
# small kernel size of 3
# same padding
# ReLu activation function and corresponding He initializer
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same", activation="relu", kernel_initializer="he_normal")

model = tf.keras.Sequential([
    # Start with a large filter (7x7)
    # Default stride of 1 , because images small
    # Input shape according to dataset with a single (greyscale) color (maybe use Reshape layer)
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28,28,1]),
    # MaxPooling layer with default stride of 2, therefore each dimension divided by 2
    tf.keras.layers.MaxPool2D(),
    ### Repeat 2x : 2x CNN + MaxPooling
    # For larger images this could be repeated further
    # Note: 
    #   Filters double after MaxPooling as shape reduces by half, no fear: exploding parameters, memory usage, computational load
    #   Filters get larger towards the output layer: 64, 128, 256
    #   Low-level features are mostly low (small circles, horizontal lines)
    #   But many ways to combine them into higher-level features (e.g. face)
    # 1.
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    tf.keras.layers.MaxPool2D(),
    # 2.
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    tf.keras.layers.MaxPool2D(),
    # Fully connected layer
    # Flatten -> 2-D image to 1-D array
    tf.keras.layers.Flatten(),
    # Dense: each node is connected with all of the others
    tf.keras.layers.Dense(units=128, activation="relu", kernel_initializer="he_normal"),
    # Each training iteration a random subset of all neurons in one layer (except output layer) are dropped out (output 0)
    tf.keras.layers.Dropout(0.5),
    # Fewer units towards the output layer
    tf.keras.layers.Dense(units=64, activation="relu", kernel_initializer="he_normal"),
    # Dropout 50 % still
    tf.keras.layers.Dropout(0.5),
    # Softmax converts into propabilities
    tf.keras.layers.Dense(units=10, activation="softmax"),
])

In [20]:
model.compile(
            # Sparse labels: i.e. for each instance, there is just a target class index from 0 to 9
            # One-hot vector like output would be just categorical_crossentropy
            # Binary or multilabel binary classification: sigmoid in output instead of softmax + binary_crossentropy 
            loss="sparse_categorical_crossentropy",
            optimizer="sgd", # Gradient decent?
            metrics=["accuracy"])

In [21]:
# Tracking 
run_num = "1"
graph_dir = f"logs/{run_num}/trace"
graph_name = f"trace_{run_num}"

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f"logs/{run_num}", histogram_freq=1, write_graph=True)

# Retrieve GPU Usage after each epoch
import subprocess
class GPUMonitorCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        result = subprocess.run(["nvidia-smi", "--query-gpu=memory.used,memory.total", "--format=csv,noheader,nounits"], 
                                stdout=subprocess.PIPE)
        gpu_info = result.stdout.decode('utf-8').strip()
        print(f"Epoch {epoch + 1} GPU usage: {gpu_info}")

In [None]:
# Training set: actual data model trains on
# Test set: 
#   ...is independent of the training set but has a somewhat similar type of probability distribution of classes 
#   and is used as a benchmark to evaluate the model, used only after the training of the model is complete. 
# Validation set:
#   ...is used to fine-tune the hyperparameters of the model and is considered a part of the training of the model.
# Default batch size 32

tf.summary.trace_on(graph=True, profiler=True,profiler_outdir=graph_dir)
# Dann deinen Trainingscode ausführen
history = model.fit(X_train, y_train, batch_size=32, epochs=2, validation_data=(X_test, y_test), callbacks=[tensorboard_callback, GPUMonitorCallback()])
# Anschließend exportieren:
with tf.summary.create_file_writer(graph_dir).as_default():
    tf.summary.trace_export(name=graph_name, step=0)

In [None]:
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir ./logs/1/train