# Imports

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Constants

In [None]:
n_samples = 10000
test_samples = 3000
a1 = np.array([3, 3], dtype=np.float32)
a2 = np.array([-3, -3], dtype=np.float32)
epoch_counts = [5, 10, 20, 50, 75, 100]
neuron_counts = range(1, 21)

# Generate train and test data

In [None]:
# Define the data generation function
def generate_data(n):
    X = np.random.randn(n, 2).astype(np.float32)
    Z = np.random.standard_normal((n, 1)).astype(np.float32)
    Y = (tf.nn.sigmoid(a1 @ X.T) + ((a2 @ X.T)**2) + (0.30 * Z)).numpy()
    return X, Y

# Data preparation
X_train, Y_train = generate_data(n_samples)
X_test, Y_test = generate_data(test_samples)

# Part 1:
### Plot the training and test error curves as a function of the number of training epochs (Use the root mean squared error metric RMSE to evaluate the training and test errors), try several epochs numbers i.e. 5, 10, 20, 50, ,75 ,100. Discuss the overfitting behavior in each case

In [None]:
# Create the model defention
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation='sigmoid', input_shape=(2,)),
        tf.keras.layers.Dense(1, activation='linear')
    ])
    model.compile(loss='mse')
    return model

# Setup grid for drawing the results
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 10)) 
axes = axes.flatten()

# Try for each epoch count
for i, epochs in enumerate(epoch_counts):
    model = create_model()
    history = model.fit(X_train, Y_train, epochs=epochs, validation_data=(X_test, Y_test), verbose=0)

    # RMSE
    train_rmse = np.sqrt(history.history['loss'])
    test_rmse = np.sqrt(history.history['val_loss'])

    # Plotting
    ax = axes[i]
    ax.plot(range(1, epochs + 1), train_rmse, label='Train RMSE')
    ax.plot(range(1, epochs + 1), test_rmse, label='Test RMSE')
    ax.set_title(f'RMSE over {epochs} Epochs')
    ax.set_xlabel('Epochs')
    ax.set_ylabel('RMSE')
    ax.legend()

    # Annotate final RMSE values
    final_train_rmse = train_rmse[-1]
    final_test_rmse = test_rmse[-1]
    ax.text(0.5, 0.9, f'Final Train RMSE: {final_train_rmse:.4f}', transform=ax.transAxes, ha='center', va='top', fontsize=10, color='blue')
    ax.text(0.5, 0.85, f'Final Test RMSE: {final_test_rmse:.4f}', transform=ax.transAxes, ha='center', va='top', fontsize=10, color='green')

# Adjust layout and show plot
plt.tight_layout()
plt.show()

# Part 2
### Vary the number of hidden layer in the network, from 1 up to 20, and determine the minimum number needed to perform well for this task.

In [None]:
def create_model(num_hidden_layers, neurons_per_layer=10):
    model = tf.keras.Sequential()
    for _ in range(num_hidden_layers):
        model.add(tf.keras.layers.Dense(neurons_per_layer, activation='sigmoid', input_shape=(2,)))
    model.add(tf.keras.layers.Dense(1, activation='linear'))
    return model

layer_counts = range(1, 21)
train_rmse_results = []
test_rmse_results = []

for layers in layer_counts:
    model = create_model(num_hidden_layers=layers)
    model.compile(loss='mse')
    
    history = model.fit(X_train, Y_train, epochs=50, validation_data=(X_test, Y_test), verbose=0)
    
    # Calculate RMSE for training and test
    train_rmse = np.sqrt(history.history['loss'][-1])
    test_rmse = np.sqrt(history.history['val_loss'][-1])
    
    train_rmse_results.append(train_rmse)
    test_rmse_results.append(test_rmse)

# Plotting results
plt.figure(figsize=(10, 6))
plt.plot(layer_counts, train_rmse_results, label='Train RMSE')
plt.plot(layer_counts, test_rmse_results, label='Test RMSE')
plt.xlabel('Number of Hidden Layers')
plt.ylabel('RMSE')
plt.title('Effect of Hidden Layer Count on Model Performance')
plt.legend()
plt.grid(True)
plt.show()

## The best number of hidden layers is 4 which gave the lowest rmse while maintaining the lowest complixity.
## After that increasing the model complexity caused some kind of underfitting after 9 hidden layers

# Part 3
### Try to optimize the feed-forward neural network built in 2 to reach the best possible accuracy by tune
### a. Overfitting avoiding i.e. early stopping, L1 and L2 regularization (try several values of λ), max-norm, and dropout
### b. Vanishing gradient overcoming, i.e. parameter initialization, none saturation activation function, batch normalization, and gradient clipping.
### c. The optimizer and its hyperparameters
### d. The number of neurons of the hidden layers.
### e. The mini-batches of training
### f. Activation function for the hidden layers (i.e. tanh, ReLU, linear, etc.)

In [6]:
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

def create_model(l1_reg, l2_reg, dropout_rate, max_norm_value):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation='sigmoid', input_shape=(2,),
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1_reg, l2=l2_reg),
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=max_norm_value)),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(10, activation='sigmoid',
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1_reg, l2=l2_reg),
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=max_norm_value)),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(10, activation='sigmoid',
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1_reg, l2=l2_reg),
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=max_norm_value)),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(10, activation='sigmoid',
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1_reg, l2=l2_reg),
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=max_norm_value)),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(1, activation='linear')
    ])
    return model

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
regularization_params = [(0.01, 0.01), (0.001, 0.01), (0.01, 0.001)]
dropout_rates = [0.2, 0.3, 0.4]
max_norm_params= [3,4]

best_model = 0
lowest_rmse = 1000000
for l1_reg, l2_reg in regularization_params:
    for dropout_rate in dropout_rates:
        for max_norm in max_norm_params:
            model = create_model(l1_reg, l2_reg, dropout_rate, max_norm)
            model.compile(loss='mse')
            history = model.fit(X_train, Y_train, epochs=100, validation_data=(X_val, Y_val), verbose=0, callbacks=[early_stopping])

            # Evaluate the model
            train_rmse = np.sqrt(model.evaluate(X_train, Y_train, verbose=0))
            val_rmse = np.sqrt(model.evaluate(X_val, Y_val, verbose=0))
            test_rmse = np.sqrt(model.evaluate(X_test, Y_test, verbose=0))
            
            if test_rmse < lowest_rmse:
                best_model = model

            print(f"Training RMSE: {train_rmse:.4f}, Validation RMSE: {val_rmse:.4f}, Test RMSE: {test_rmse:.4f} with L1={l1_reg}, L2={l2_reg}, Dropout={dropout_rate}, Max-norm={max_norm}")


Training RMSE: 24.9885, Validation RMSE: 25.6450, Test RMSE: 24.6139 with L1=0.01, L2=0.01, Dropout=0.2, Max-norm=3
Training RMSE: 24.9841, Validation RMSE: 25.6394, Test RMSE: 24.6087 with L1=0.01, L2=0.01, Dropout=0.2, Max-norm=4
Training RMSE: 24.9926, Validation RMSE: 25.6553, Test RMSE: 24.6219 with L1=0.01, L2=0.01, Dropout=0.3, Max-norm=3
Training RMSE: 24.9929, Validation RMSE: 25.6555, Test RMSE: 24.6221 with L1=0.01, L2=0.01, Dropout=0.3, Max-norm=4
Training RMSE: 24.9947, Validation RMSE: 25.6625, Test RMSE: 24.6271 with L1=0.01, L2=0.01, Dropout=0.4, Max-norm=3
Training RMSE: 25.0020, Validation RMSE: 25.6723, Test RMSE: 24.6361 with L1=0.01, L2=0.01, Dropout=0.4, Max-norm=4
Training RMSE: 24.9839, Validation RMSE: 25.6403, Test RMSE: 24.6092 with L1=0.001, L2=0.01, Dropout=0.2, Max-norm=3
Training RMSE: 24.9837, Validation RMSE: 25.6408, Test RMSE: 24.6094 with L1=0.001, L2=0.01, Dropout=0.2, Max-norm=4
Training RMSE: 24.9861, Validation RMSE: 25.6471, Test RMSE: 24.6142 w

InvalidArgumentError: Graph execution error:

Detected at node RMSprop/RMSprop/update_2/Maximum defined at (most recent call last):
  File "/Users/alaaodeh/miniforge3/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/Users/alaaodeh/miniforge3/lib/python3.10/runpy.py", line 86, in _run_code

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 736, in start

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/Users/alaaodeh/miniforge3/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/Users/alaaodeh/miniforge3/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/Users/alaaodeh/miniforge3/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 740, in execute_request

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 546, in run_cell

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code

  File "/var/folders/4b/06ymqbhd3c785p2vvkrn8r5m0000gn/T/ipykernel_6205/41716039.py", line 37, in <module>

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1154, in train_step

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/optimizers/legacy/optimizer_v2.py", line 601, in minimize

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/optimizers/legacy/optimizer_v2.py", line 760, in apply_gradients

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/optimizers/legacy/optimizer_v2.py", line 819, in _distributed_apply

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/optimizers/legacy/optimizer_v2.py", line 801, in apply_grad_to_update_var

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/constraints.py", line 137, in __call__

  File "/Users/alaaodeh/miniforge3/lib/python3.10/site-packages/keras/src/backend.py", line 3040, in sqrt

Incompatible shapes: [1,10] vs. [0]
	 [[{{node RMSprop/RMSprop/update_2/Maximum}}]] [Op:__inference_train_function_3278963]

In [73]:
len(Y_val)

215

In [14]:
X_train[0]

array([ 0.36192387, -0.25395247], dtype=float32)

In [8]:
Y_train

array([ 54.931316 , 132.98552  ,  14.104847 , ...,  12.71713  ,
         7.3315573,  51.191006 ], dtype=float32)