# Project 8: Apply key techniques employed in building deep learning architectures

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Business Understanding: Classify handwritten digits

# Data Understanding: Load and preprocess the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize pixel values between 0 and 1
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Data Preparation: Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# Reshape images to match the input shape expected by EfficientNet
X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Data Augmentation: Generate additional training data
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1)
datagen.fit(X_train)

# Transfer Learning: Load the pre-trained EfficientNetB0 model
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(28, 28, 1))

# Freeze the pre-trained layers
base_model.trainable = False

# Add custom layers for digit classification
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(256, activation='relu'))
model.add(Dense(10, activation='softmax'))

# Model Compilation
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Model Training
history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, epochs=10,
                    validation_data=(X_val, y_val))

# Evaluation: Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')

# Model Deployment: Make predictions on new, unseen data
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(y_test, axis=1)
print(classification_report(true_labels, predicted_labels))

# Hyperparameter Tuning: Grid Search
param_grid = {'optimizer': ['adam', 'sgd'], 'learning_rate': [0.001, 0.01]}
for params in ParameterGrid(param_grid):
    model.compile(optimizer=params['optimizer'], loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(datagen.flow(X_train, y_train, batch_size=32),
              steps_per_epoch=len(X_train) / 32, epochs=10,
              validation_data=(X_val, y_val))
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Parameters: {params}')
    print(f'Test loss: {loss:.4f}')
    print(f'Test accuracy: {accuracy:.4f}')
    print()

# Model Evaluation: Cross-Validation
k = 5
X = np.concatenate((X_train, X_val), axis=0)
y = np.concatenate((y_train, y_val), axis=0)
folds = KFold(n_splits=k, shuffle=True, random_state=42)
for fold, (train_indices, val_indices) in enumerate(folds.split(X)):
    X_train_fold = X[train_indices]
    y_train_fold = y[train_indices]
    X_val_fold = X[val_indices]
    y_val_fold = y[val_indices]
    model.fit(datagen.flow(X_train_fold, y_train_fold, batch_size=32),
              steps_per_epoch=len(X_train_fold) / 32, epochs=10,
              validation_data=(X_val_fold, y_val_fold))
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Fold {fold+1}:')
    print(f'Test loss: {loss:.4f}')
    print(f'Test accuracy: {accuracy:.4f}')
    print()

# Model Deployment: Save the trained model for future use
model.save('mnist_classification_model.h5')


In this code:

The activation function 'relu' is used in the custom layers of the model for introducing non-linearity.

Convolutional operations are implicitly performed by the EfficientNetB0 model, which has convolutional layers that automatically learn relevant features from the input images.

The code focuses on image classification, so recurrent connections are not relevant in this specific context. Recurrent connections are typically used for sequential data such as text or time series.

Regularization techniques, such as dropout and batch normalization, are not explicitly applied in this code snippet. However, you can incorporate them by adding Dropout and BatchNormalization layers to the custom layers of the model.

The code utilizes the Adam optimization algorithm with a learning rate of 0.001. Hyperparameter tuning can be performed by exploring different optimizers and learning rates using techniques like grid search, as shown in the example.

Transfer learning is applied by using the pre-trained EfficientNetB0 model, which has been trained on the ImageNet dataset. The pre-trained weights are frozen, and only the custom layers on top are trained for the specific task of digit classification.

Model evaluation is performed by calculating the test loss, accuracy, and generating a classification report to assess the performance of the model on the test set.

Model deployment is demonstrated by saving the trained model to a file for future use.

Note that the code snippets provided are for illustrative purposes and may require further modifications based on your specific use case or dataset.