# Optimization Project

## Comparison of SGD an SFW with different Learning Rates

In [None]:
!pip install ucimlrepo
!pip install --upgrade certifi

In [None]:
import itertools
import ssl
import math
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo

In [None]:
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
print("Tensorflow version: ", tf.__version__)

In [None]:
print("List GPU devices: ", tf.config.list_physical_devices('GPU'))

## A Simple DNN with 2 Hidden Layer on A Multivariate Dataset with 7 Classes

# Dry Bean Dataset Description

## Dataset Overview
- **Number of Instances:** 13,611
- **Number of Features:** 16
- **Feature Types:** Integer, Real
- **Subject Area:** Biology
- **Associated Tasks:** Classification
- **Data Type:** Multivariate

## Dataset Information
This dataset consists of images capturing 13,611 grains of seven different registered dry beans, taken with a high-resolution camera. The primary purpose of the dataset is to support classification tasks, specifically to distinguish between seven varieties of dry beans based on various features related to form, shape, type, and structure.

## Features
1. **Area (A):** The area of a bean zone and the number of pixels within its boundaries.
2. **Perimeter (P):** Bean circumference, defined as the length of its border.
3. **Major Axis Length (L):** Distance between the ends of the longest line that can be drawn from a bean.
4. **Minor Axis Length (l):** The longest line that can be drawn from the bean while standing perpendicular to the main axis.
5. **Aspect Ratio (K):** Defines the relationship between L and l.
6. **Eccentricity (Ec):** Eccentricity of the ellipse having the same moments as the region.
7. **Convex Area (C):** Number of pixels in the smallest convex polygon that can contain the area of a bean seed.
8. **Equivalent Diameter (Ed):** The diameter of a circle having the same area as a bean seed area.
9. **Extent (Ex):** The ratio of the pixels in the bounding box to the bean area.
10. **Solidity (S):** Also known as convexity, the ratio of the pixels in the convex shell to those found in beans.
11. **Roundness (R):** Calculated with the formula: (4πA)/(P^2).
12. **Compactness (CO):** Measures the roundness of an object: Ed/L.
13. **ShapeFactor1 (SF1):** Feature.
14. **ShapeFactor2 (SF2):** Feature.
15. **ShapeFactor3 (SF3):** Feature.
16. **ShapeFactor4 (SF4):** Feature.

## Target Variable
- **Class:** Categorical variable indicating the type of dry bean. Possible classes are Seker, Barbunya, Bombay, Cali, Dermosan, Horoz, and Sira.

## Units and Missing Values
- Units are specified for relevant features.
- No missing values are reported in the provided information.

## Dataset Creation and Purpose
This dataset was curated to develop a classification model capable of distinguishing between different varieties of dry beans based on high-resolution images and extracted features. The features include both geometrical dimensions and shape forms, providing a comprehensive set for robust classification.



In [None]:
# fetch dataset 
dry_bean_dataset = fetch_ucirepo(id=602)

# data (as pandas dataframes) 
X = dry_bean_dataset.data.features
y = dry_bean_dataset.data.targets

In [None]:
class_names = ['Seker', 'Barbunya', 'Bombay', 'Cali', 'Dermosan', 'Horoz', 'Sira']

In [None]:
X = X.values
y = y.values

In [None]:
def normalize_features(input_data):
    return StandardScaler().fit_transform(input_data)


def one_hot_label_encoder(input_data):
    return OneHotEncoder().fit_transform(input_data.reshape(-1, 1)).toarray()

In [None]:
X = normalize_features(X)
y = one_hot_label_encoder(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
def plot_loss_curve(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper right')
    plt.show()
    return


def plot_accuracy_curve(history):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='lower right')
    plt.show()
    return


def plot_precision_curve(history):
    plt.plot(history.history['precision'])
    plt.plot(history.history['val_precision'])
    plt.title('Model Precision')
    plt.ylabel('Precision')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='lower right')
    plt.show()
    return


def plot_recall_curve(history):
    plt.plot(history.history['recall'])
    plt.plot(history.history['val_recall'])
    plt.title('Model Recall')
    plt.ylabel('Recall')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='lower right')
    plt.show()
    return


def plot_confusion_matrix(dnn_model, features, targets, classes, dataset):
    y_test_preds = dnn_model.predict(features, verbose='auto', steps=test_steps)
    y_test_preds = np.argmax(y_test_preds, axis=1)
    y_test = np.argmax(targets, axis=1)
    if dataset == 'cifar10':
        cm = confusion_matrix(y_test, y_test_preds, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8])
    elif dataset == 'dry_bean':
        cm = confusion_matrix(y_test, y_test_preds, labels=[0, 1, 2, 3, 4, 5, 6])
    else:
        raise ValueError('dataset should be one of cifar10 or dry_bean.')
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Oranges)
    plt.title('Confusion Matrix')
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    plt.colorbar()
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'), horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    return

In [None]:
batch_size = 32

train_steps = np.ceil(X_train.shape[0] / batch_size)
test_steps = np.ceil(X_test.shape[0] / batch_size)
val_steps = np.ceil(X_val.shape[0] / batch_size)

In [None]:
def get_f1_score(p, r):
    precision = p.result().numpy()
    recall = r.result().numpy()
    return 2 * (precision * recall) / (precision + recall)

In [None]:
def create_dnn_model(input_shape, num_of_hidden, units, activations, num_of_classes):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(input_shape,)))
    for i in range(num_of_hidden):
        model.add(tf.keras.layers.Dense(units[i], activations[i]))
    if num_of_classes == 1:
        model.add(tf.keras.layers.Dense(num_of_classes, activation='linear'))
        return model
    elif num_of_classes == 2:
        model.add(tf.keras.layers.Dense(num_of_classes, activation='sigmoid'))
        return model
    model.add(tf.keras.layers.Dense(num_of_classes, activation='softmax'))
    return model

In [None]:
def reset_model_parameters(model):
    for layer in model.layers:
        layer.set_weights([
            tf.keras.initializers.GlorotUniform()(shape=layer.get_weights()[0].shape),
            tf.zeros_like(layer.get_weights()[1])
        ])
    print('All model parameters have been reinitilized.')
    return

In [None]:
precision_metric = tf.keras.metrics.Precision()
recall_metric = tf.keras.metrics.Recall()

In [None]:
# learning_rates = []

In [None]:
class PrintLearningRateCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        learning_rate = self.model.optimizer.lr.numpy()
        print(f"\nLearning Rate at the end of epoch {epoch + 1}: {learning_rate}\n")

#         learning_rates.append(learning_rate)


In [None]:
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_recall',
    mode='max',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

### SGD as optimizer

In [None]:
model = create_dnn_model(input_shape=X_train.shape[1], num_of_hidden=2, units=[16, 8], activations=['relu', 'relu'],
                         num_of_classes=len(class_names))

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with SGD optimizer:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

### Stochastic Frank-Wolfe with L1 ball as feasible region as optimizer

In [None]:
class MyLearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_learning_rate, method, params):
        self.initial_learning_rate = initial_learning_rate
        self.method = method
        self.params = params

    def __call__(self, step):
        if self.method == "FIX":
            return self.initial_learning_rate

        elif self.method == "STEP":
            gamma = self.params.get("gamma")
            l = self.params.get("l")
            return self.initial_learning_rate * tf.pow(gamma, tf.math.floor(tf.cast(step, tf.float32) / l))

        elif self.method == "EXP":
            gamma = self.params.get("gamma")
            return self.initial_learning_rate * tf.pow(gamma, tf.cast(step, tf.float32))

        elif self.method == "INV":
            gamma = self.params.get("gamma")
            p = self.params.get("p")
            return self.initial_learning_rate / tf.pow((1 + (gamma * tf.cast(step, tf.float32))), p)

        elif self.method == "POLY":
            l = self.params.get("l")
            p = self.params.get("p")
            return self.initial_learning_rate * tf.pow((1 - (tf.cast(step, tf.float32) / l)), p)

        elif self.method in ["TRI", "TRI2", "TRIEXP", "SIN", "SIN2", "SINEXP", "COS"]:
            l = self.params.get("l")
            k0 = self.initial_learning_rate
            k1 = self.params.get("k1")
            gamma = self.params.get("gamma")
            if self.method == "TRI":
                return k0 + (k1 - k0) * tf.abs(
                    tf.math.asin(tf.math.sin(math.pi * tf.cast(step, tf.float32) / (2 * l)))) / (2 / math.pi)

            elif self.method == "TRI2":
                return k0 + (k1 - k0) * (1 / (tf.pow(2, tf.math.floor(tf.cast(step, tf.float32) / (2 * l))))) * tf.abs(
                    tf.math.asin(tf.math.sin(math.pi * tf.cast(step, tf.float32) / (2 * l)))) / (2 / math.pi)

            elif self.method == "TRIEXP":
                return (k0 + (k1 - k0) * tf.abs(
                    tf.math.asin(tf.math.sin(math.pi * tf.cast(step, tf.float32) / (2 * l)))) / (
                                math.pi / 2)) * tf.pow(gamma, tf.cast(step, tf.float32))

            elif self.method == "SIN":
                return k0 + (k1 - k0) * tf.abs(tf.math.sin(math.pi * tf.cast(step, tf.float32) / (2 * l)))

            elif self.method == "SIN2":
                return k0 + (k1 - k0) * (1 / (tf.pow(2, tf.math.floor(tf.cast(step, tf.float32) / (2 * l))))) * tf.abs(
                    tf.math.sin(math.pi * tf.cast(step, tf.float32) / (2 * l)))

            elif self.method == "SINEXP":
                return (k0 + (k1 - k0) * tf.abs(
                    tf.math.sin(math.pi * tf.cast(step, tf.float32) / (2 * l)))) * tf.pow(gamma,
                                                                                          tf.cast(step, tf.float32))

            elif self.method == "COS":
                return k0 + (k1 - k0) * 0.5 * (1 + tf.math.cos(math.pi * 2 * tf.cast(step, tf.float32) / l))

        else:
            raise ValueError("The given method does not exist.")

    def get_config(self):
        return {
            'initial_learning_rate': self.initial_learning_rate,
            'method': self.method,
            'params': self.method
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)

In [None]:
class StochasticFrankWolfe(tf.keras.optimizers.Optimizer):
    def __init__(
            self,
            learning_rate,
            momentum=0.9,
            nesterov=False,
            weight_decay=None,
            clipnorm=None,
            clipvalue=None,
            global_clipnorm=None,
            use_ema=False,
            ema_momentum=0.99,
            ema_overwrite_frequency=None,
            jit_compile=True,
            name="StochasticFrankWolfe",
            **kwargs
    ):
        super().__init__(
            name=name,
            weight_decay=weight_decay,
            clipnorm=clipnorm,
            clipvalue=clipvalue,
            global_clipnorm=global_clipnorm,
            use_ema=use_ema,
            ema_momentum=ema_momentum,
            ema_overwrite_frequency=ema_overwrite_frequency,
            jit_compile=jit_compile,
            **kwargs
        )
        self._learning_rate = self._build_learning_rate(learning_rate)
        self.momentum = momentum
        if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1):
            raise ValueError("`momentum` must be between [0, 1].")

    def build(self, var_list):
        super().build(var_list)
        if hasattr(self, "_built") and self._built:
            return
        self._built = True
        self.momentums = []
        for var in var_list:
            self.momentums.append(
                self.add_variable_from_reference(
                    model_variable=var, variable_name="m"
                )
            )

    def update_step(self, gradient, variable):
        # Cast the learning rate and the iteration count to the dtype of the variable
        lr = tf.cast(self.learning_rate, variable.dtype)  # Corresponds to αt in Algorithm 1
        local_step = tf.cast(self.iterations + 1, variable.dtype)  # Corresponds to t in Algorithm 1

        # Compute the step size as learning rate divided by the iteration count
        step_size = lr / local_step  # This is a common way to decay the learning rate in stochastic optimization algorithms

        # Cast the momentum to the dtype of the variable
        momentum = tf.cast(self.momentum, variable.dtype)  # Corresponds to ρt in Algorithm 1

        # Get the momentum variable for this variable
        m = self.momentums[self._index_dict[self._var_key(variable)]]  # Corresponds to mt in Algorithm 1

        # Scale the gradient by its L1 norm
        scaled_gradient = gradient / tf.norm(gradient,
                                             ord=1)  # This is a specific choice made in your implementation, not directly specified in Algorithm 1

        # Update the momentum variable with the current gradient and the previous momentum
        m.assign((1. - momentum) * scaled_gradient + momentum * m)  # Corresponds to Line 5 in Algorithm 1

        # Compute the direction of the update as the sign of the negative momentum
        v = tf.sign(-m)  # Corresponds to Line 6 in Algorithm 1, assuming the feasible region C is the L1 ball

        # Perform the update
        if isinstance(gradient, tf.IndexedSlices):
            # If the gradient is sparse, perform a sparse update
            indices = gradient.indices
            updates = step_size * (v - variable)
            variable.scatter_nd_update(indices[:, None],
                                       updates)  # This is a TensorFlow-specific way to handle sparse updates
        else:
            # If the gradient is dense, perform a dense update
            variable.assign_add(step_size * (v - variable))  # Corresponds to Line 7 in Algorithm 1

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "learning_rate": self._serialize_hyperparameter(
                    self._learning_rate
                ),
                "momentum": self.momentum,
            }
        )
        return config

In [None]:
#FIX
model.compile(
    optimizer=StochasticFrankWolfe(
        learning_rate=MyLearningRateSchedule(initial_learning_rate=0.1, method='FIX', params={}), momentum=0.9),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with StochasticFrankWolfe optimizer and FIX Learning Rate:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

In [None]:
#EXP
model.compile(
    optimizer=StochasticFrankWolfe(
        learning_rate=MyLearningRateSchedule(initial_learning_rate=0.5, method='EXP', params={'gamma': 0.999}),
        momentum=0.9),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=50, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with StochasticFrankWolfe optimizer and EXP Learning Rate:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

In [None]:
#INV
model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=MyLearningRateSchedule(initial_learning_rate=0.5, method='INV',
                                                                        params={'gamma': 0.001, 'p': 0.9}),
                                   momentum=0.9),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=40, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with StochasticFrankWolfe optimizer and INV Learning Rate:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

In [None]:
#STEP
model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=MyLearningRateSchedule(initial_learning_rate=0.5, method='STEP',
                                                                        params={'gamma': 0.99, 'l': 130}),
                                   momentum=0.9),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with StochasticFrankWolfe optimizer and STEP Learning Rate:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

In [None]:
#POLY
model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=MyLearningRateSchedule(initial_learning_rate=0.1, method='POLY',
                                                                        params={'p': 2, 'l': train_steps ** 2}),
                                   momentum=0.9),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with StochasticFrankWolfe optimizer and POLY Learning Rate:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

In [None]:
#SINEXP
model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=MyLearningRateSchedule(initial_learning_rate=0.05, method='SINEXP',
                                                                        params={'k1': 0.05, 'l': 500, 'gamma': 0.999}),
                                   momentum=0.9),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
model_history = model.fit(X_train, y_train, batch_size=batch_size, epochs=100, shuffle=True,
                          validation_data=(X_val, y_val),
                          steps_per_epoch=train_steps, validation_steps=val_steps,
                          callbacks=[PrintLearningRateCallback()], verbose='auto')

In [None]:
plot_loss_curve(model_history)
plot_accuracy_curve(model_history)
plot_precision_curve(model_history)
plot_recall_curve(model_history)

plot_confusion_matrix(model, X_test, y_test, classes=class_names, dataset='dry_bean')

model_score = model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DNN model classifier evaluation results with StochasticFrankWolfe optimizer and SINEXP Learning Rate:\n")
print('Test set Loss = {:.5f}'.format(model_score[0]))
print('Test set Accuracy = {:.2f}'.format(model_score[1]))
print('Test set Precision = {:.2f}'.format(model_score[2]))
print('Test set Recall = {:.2f}'.format(model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
reset_model_parameters(model)

# DenseNet121 on CIFAR-10 Dataset with SGD and SFW

# CIFAR-10 Dataset Description

## Dataset Overview
- **Number of Instances:** 60,000 (50,000 for training, 10,000 for testing)
- **Number of Classes:** 10
- **Data Type:** Multivariate
- **Subject Area:** Computer Vision
- **Associated Tasks:** Image Classification
- **Image Dimensions:** 32x32 pixels with 3 color channels (RGB)

## Dataset Information
The CIFAR-10 dataset is a collection of 60,000 32x32 color images in 10 different classes, with 6,000 images per class. The dataset is split into a training set of 50,000 images and a test set of 10,000 images. Each class represents a distinct object or animal category.

## Classes
1. **Airplane**
2. **Automobile**
3. **Bird**
4. **Cat**
5. **Deer**
6. **Dog**
7. **Frog**
8. **Horse**
9. **Ship**
10. **Truck**

## Image Features
- Each image is 32x32 pixels, and it has three color channels (RGB).
- Total Features: \(32 \times 32 \times 3 = 3072\) features per image.

## Target Variable
- **Class Label:** Categorical variable indicating the class of the object or animal in the image.

## Dataset Purpose
CIFAR-10 is widely used in the field of computer vision for benchmarking image classification algorithms. The relatively small size of the images and the variety of classes make it a suitable dataset for testing and comparing the performance of different models.

## Units and Missing Values
- Pixel values in the RGB channels represent color intensity (0 to 255).
- No missing values are reported as images are complete and standardized.

## Additional Notes
- CIFAR-10 serves as a standard benchmark in machine learning research and is often used for educational purposes due to its manageable size.



In [None]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
split = 40000
X_val, y_val = X_train[split:], y_train[split:]
X_train, y_train = X_train[:split], y_train[:split]

In [None]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
batch_size = 32

train_steps = int(np.ceil(X_train.shape[0] / batch_size))
test_steps = int(np.ceil(X_test.shape[0] / batch_size))
val_steps = int(np.ceil(X_val.shape[0] / batch_size))

In [None]:
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[y_train[i][0]])
plt.show()

In [None]:
X_train = tf.keras.applications.densenet.preprocess_input(X_train)
X_val = tf.keras.applications.densenet.preprocess_input(X_val)
X_test = tf.keras.applications.densenet.preprocess_input(X_test)

In [None]:
y_train = one_hot_label_encoder(y_train)
y_val = one_hot_label_encoder(y_val)
y_test = one_hot_label_encoder(y_test)

In [None]:
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.summary()

In [None]:
densenet_model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.0005),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=45, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SGD optimizer:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#FIX
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(
        learning_rate=MyLearningRateSchedule(initial_learning_rate=0.1, method='FIX', params={}), momentum=0.9),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=50, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#EXP
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=0.5, momentum=0.99, lr_schedule='EXP',
                                   schedule_params={'gamma': 0.01}),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
class PrintLR(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print('\nLearning rate for epoch {} is {}'.format(epoch + 1, densenet_model.optimizer.lr.numpy()))

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=50, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps,
                                            callbacks=[es, PrintLR()], verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#INV
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=0.001, momentum=0.99, lr_schedule='INV',
                                   schedule_params={'gamma': 0.09}),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#POLY
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=0.001, momentum=0.99, lr_schedule='POLY',
                                   schedule_params={'l': train_steps, 'p': 1.1}),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#TRI
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=0.001, momentum=0.99, lr_schedule='TRI',
                                   schedule_params={'k0': 0.0001, 'k1': 0.001, 'l': 3, 'gamma': 0.9}),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#SINEXP
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=0.001, momentum=0.99, lr_schedule='SINEXP',
                                   schedule_params={'k0': 0.0001, 'k1': 0.001, 'l': 3, 'gamma': 0.9}),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))

In [None]:
#COS
densenet_model = tf.keras.applications.densenet.DenseNet121(include_top=True, weights=None,
                                                            input_shape=X_train.shape[1:],
                                                            pooling='avg', classes=len(class_names),
                                                            classifier_activation='softmax')

In [None]:
densenet_model.compile(
    optimizer=StochasticFrankWolfe(learning_rate=0.001, momentum=0.99, lr_schedule='COS',
                                   schedule_params={'k0': 0.0001, 'k1': 0.001, 'l': 3, 'gamma': 0.9}),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy', precision_metric, recall_metric])

In [None]:
densenet_model_history = densenet_model.fit(X_train, y_train, batch_size=batch_size, epochs=20, shuffle=True,
                                            validation_data=(X_val, y_val),
                                            steps_per_epoch=train_steps, validation_steps=val_steps, callbacks=[es],
                                            verbose='auto')

In [None]:
plot_loss_curve(densenet_model_history)
plot_accuracy_curve(densenet_model_history)

plot_confusion_matrix(densenet_model, X_test, y_test, classes=class_names, dataset='cifar10')

densenet_model_score = densenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose='auto', steps=test_steps)
print("DenseNet model classifier evaluation results with SFW optimizer with fixed step size:\n")
print('Test set Loss = {:.5f}'.format(densenet_model_score[0]))
print('Test set Accuracy = {:.2f}'.format(densenet_model_score[1]))
print('Test set Precision = {:.2f}'.format(densenet_model_score[2]))
print('Test set Recall = {:.2f}'.format(densenet_model_score[3]))
print('Test set F1 Score = {:.2f}'.format(get_f1_score(precision_metric, recall_metric)))