- Implement CNN from Page 496 (Chapter14) for the Fashion MNIST dataset (Chapter 10, Page 318) 

In [1]:
import numpy as np

In [2]:
import tensorflow as tf

# Testen, ob TensorFlow korrekt installiert a
print("TensorFlow version:", tf.__version__)
print("Available CPUs:", tf.config.list_physical_devices('CPU'))

2025-02-18 21:35:59.387801: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-18 21:35:59.394192: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-18 21:35:59.416844: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739910959.457769    6692 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739910959.468288    6692 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-18 21:35:59.505765: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

TensorFlow version: 2.18.0
Available CPUs: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


2025-02-18 21:36:03.386759: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [3]:
# Load famous fashion MNIST dataset
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
# Already shuffled and split into training set (60k images) and test set (10k images)
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
# Hold out the last 5k images from training set for validation
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

In [4]:
# Keras images are represented by 28x28 rather than 1-D 784 (sci-kit learn)
X_train.shape

(55000, 28, 28)

In [5]:
# Keras pixel intensity integers 0-255, rather than float 0.0-255.0 (sci-kit learn)
X_train.dtype

dtype('uint8')

In [6]:
# Simplicity: Scale down to 0-1 range and transform to float by dividing by 255.0
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.

In [None]:
# Add greyscale dimension to fit into expected model input
# Alternatives, np.reshape, np.expanddims, or Reshape layer in CNN
X_train, X_valid, X_test = X_train[:,:,:, np.newaxis], X_valid[:,:,:, np.newaxis], X_test[:,:,:, np.newaxis]

In [None]:
# Dimension now [28, 28, 1]
X_train.shape

In [7]:
# Connect output to classes defined in "https://keras.io/api/datasets/fashion_mnist/"
class_names = ["T-shirt/top", "Trousers", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [8]:
# y_train contains 4,9,0,2 class labels
class_names[y_train[0]]

'Ankle boot'

## Build the CNN
- Page 496

In [13]:
# partial introduced in Chapter 11
from functools import partial

# Acts like Conv2D but with different default arguments:
# small kernel size of 3
# same padding
# ReLu activation function and corresponding He initializer
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same", activation="relu", kernel_initializer="he_normal")

model = tf.keras.Sequential([
    # Start with a large filter (7x7)
    # Default stride of 1 , because images small
    # Input shape according to dataset with a single (greyscale) color (maybe use Reshape layer)
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28,28,1]),
    # MaxPooling layer with default stride of 2, therefore each dimension divided by 2
    tf.keras.layers.MaxPool2D(),
    ### Repeat 2x : 2x CNN + MaxPooling
    # For larger images this could be repeated further
    # Note: 
    #   Filters double after MaxPooling as shape reduces by half, no fear: exploding parameters, memory usage, computational load
    #   Filters get larger towards the output layer: 64, 128, 256
    #   Low-level features are mostly low (small circles, horizontal lines)
    #   But many ways to combine them into higher-level features (e.g. face)
    # 1.
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    tf.keras.layers.MaxPool2D(),
    # 2.
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    tf.keras.layers.MaxPool2D(),
    # Fully connected layer
    # Flatten -> 2-D image to 1-D array
    tf.keras.layers.Flatten(),
    # Dense: each node is connected with all of the others
    tf.keras.layers.Dense(units=128, activation="relu", kernel_initializer="he_normal"),
    # Each training iteration a random subset of all neurons in one layer (except output layer) are dropped out (output 0)
    tf.keras.layers.Dropout(0.5),
    # Fewer units towards the output layer
    tf.keras.layers.Dense(units=64, activation="relu", kernel_initializer="he_normal"),
    # Dropout 50 % still
    tf.keras.layers.Dropout(0.5),
    # Softmax converts into propabilities
    tf.keras.layers.Dense(units=10, activation="softmax"),
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(
            # Sparse labels: i.e. for each instance, there is just a target class index from 0 to 9
            # One-hot vector like output would be just categorical_crossentropy
            # Binary or multilabel binary classification: sigmoid in output instead of softmax + binary_crossentropy 
            loss="sparse_categorical_crossentropy",
            optimizer="sgd", # Gradient decent?
            metrics=["accuracy"]) 

In [16]:
# Training set: actual data model trains on
# Test set: 
#   ...is independent of the training set but has a somewhat similar type of probability distribution of classes 
#   and is used as a benchmark to evaluate the model, used only after the training of the model is complete. 
# Validation set:
#   ...is used to fine-tune the hyperparameters of the model and is considered a part of the training of the model.
history = model.fit(X_train, y_train, epochs=2, validation_data=(X_test, y_test))

Epoch 1/2


2025-02-18 22:03:43.201739: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 172480000 exceeds 10% of free system memory.


[1m  43/1719[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9:30[0m 340ms/step - accuracy: 0.1623 - loss: 2.4059

KeyboardInterrupt: 