<a href="https://colab.research.google.com/github/codedfortamara/datasciencecoursera/blob/main/auto_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from typing import Tuple
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from google.colab import files


# Extract the file name (assuming it's named 'auto-mpg.data' after upload)
data_path = 'auto-mpg.data'

# Step 2: Define the column names based on the Auto MPG dataset description
column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year', 'origin', 'car_name']

# Step 3: Load the dataset into a pandas DataFrame, specifying the column names
df = pd.read_csv(data_path, sep='\s+', names=column_names, na_values='?')

# Drop the 'car_name' column as it is not needed for model training
df = df.drop('car_name', axis=1)

# Handle missing values (drop rows with NaN values)
df = df.dropna()

# Split features and target
X = df.drop('mpg', axis=1)  # Features
y = df['mpg']  # Target (miles per gallon)

# Preprocessing: Split the dataset and standardize the features
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Step 4: Define the function to create the deep and wide networks
def create_auto_mpg_deep_and_wide_networks(n_inputs: int, n_outputs: int) -> Tuple[keras.models.Model, keras.models.Model]:
    """Creates one deep neural network and one wide neural network.
    The networks should have the same (or very close to the same) number of parameters and the same activation functions."""

    # Deep Neural Network
    deep_input = keras.Input(shape=(n_inputs,))
    deep_hidden = keras.layers.Dense(100, activation='relu')(deep_input)
    deep_hidden = keras.layers.Dense(50, activation='relu')(deep_hidden)
    deep_hidden = keras.layers.Dense(25, activation='relu')(deep_hidden)
    deep_output = keras.layers.Dense(n_outputs, activation='linear')(deep_hidden)
    deep_model = keras.Model(deep_input, deep_output)

    # Calculate the total number of parameters in the deep network
    deep_model.build(input_shape=(None, n_inputs))
    total_deep_params = deep_model.count_params()

    # Wide Neural Network: Calculate neuron count to match total parameters of the deep network
    wide_input = keras.Input(shape=(n_inputs,))
    wide_neuron_count = total_deep_params // (n_inputs + 1)  # Divide by number of inputs + 1 for bias
    wide_hidden = keras.layers.Dense(wide_neuron_count, activation='relu')(wide_input)
    wide_output = keras.layers.Dense(n_outputs, activation='linear')(wide_hidden)
    wide_model = keras.Model(wide_input, wide_output)

    # Compile both models with SEPARATE optimizer instances and the same loss function
    deep_optimizer = keras.optimizers.Adam()
    wide_optimizer = keras.optimizers.Adam()

    loss = 'mse'  # Mean squared error for regression

    deep_model.compile(optimizer=deep_optimizer, loss=loss)
    wide_model.compile(optimizer=wide_optimizer, loss=loss)

    return deep_model, wide_model

# Step 5: Define the function for hyperparameter tuning
def hyperparameter_tuning(n_inputs: int, n_outputs: int, X_train, y_train, X_val, y_val):
    """Test different variations of hyperparameters for the deep and wide networks."""

    # Define multiple configurations for deep networks (layer sizes)
    hyperparameter_sets = [
        (128, 64, 32),
        (36, 18, 9),
        (1024, 512, 256),
        (256, 128, 64),
        (10, 5, 2),
        (64, 32, 16),
        (128, 128, 64),
        (2048, 1024, 512),
        (512, 256, 128),
        (128, 32, 16)
    ]

    best_deep_model = None
    best_wide_model = None
    best_deep_val_loss = float('inf')
    best_wide_val_loss = float('inf')
    best_params = None

    # Loop over each hyperparameter set
    for deep_layer_sizes in hyperparameter_sets:
        print(f"Testing configuration: Deep Layers - {deep_layer_sizes}")

        # Create the deep and wide models for the current configuration
        deep_model, wide_model = create_auto_mpg_deep_and_wide_networks(n_inputs, n_outputs)

        # Train the deep model
        deep_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), verbose=1)

        # Train the wide model
        wide_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), verbose=1)

        # Evaluate the models on the validation data
        deep_val_loss = deep_model.evaluate(X_val, y_val, verbose=0)
        wide_val_loss = wide_model.evaluate(X_val, y_val, verbose=0)

        print(f"Deep Validation Loss: {deep_val_loss}")
        print(f"Wide Validation Loss: {wide_val_loss}")

        # Update the best model if this configuration is better
        if deep_val_loss < best_deep_val_loss:
            best_deep_model = deep_model
            best_deep_val_loss = deep_val_loss
            best_params = {'layers': deep_layer_sizes, 'type': 'deep'}

        if wide_val_loss < best_wide_val_loss:
            best_wide_model = wide_model
            best_wide_val_loss = wide_val_loss
            best_params = {'layers': deep_layer_sizes, 'type': 'wide'}

    # Print the best configuration and its validation loss
    print(f"Best Model Configuration: {best_params}")
    print(f"Best Deep Model Validation Loss: {best_deep_val_loss}")
    print(f"Best Wide Model Validation Loss: {best_wide_val_loss}")

    return best_deep_model, best_wide_model

# Step 6: Use the number of features from the dataset
n_inputs = X_train.shape[1]
n_outputs = 1  # Output is the 'mpg' value (regression)

# Step 7: Run hyperparameter tuning on the dataset
best_deep_model, best_wide_model = hyperparameter_tuning(n_inputs, n_outputs, X_train, y_train, X_val, y_val)


Testing configuration: Deep Layers - (128, 64, 32)
Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 161ms/step - loss: 626.7194 - val_loss: 542.7382
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 581.4659 - val_loss: 500.1115
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 530.5179 - val_loss: 433.3162
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 429.6705 - val_loss: 333.6837
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 345.9219 - val_loss: 206.8849
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 200.6111 - val_loss: 87.1250
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 79.1860 - val_loss: 35.2476
Epoch 8/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 33.6

In [5]:
#IMPORT LIBRARIES
import pandas as pd # load and process the dataset
import tensorflow as tf # Defines, compiles, and trains the neural network
import tensorflow.keras as keras # Supports the construction of both deep and wide networks
from typing import Tuple # Specifies types of variables and return types in functions
from sklearn.model_selection import train_test_split # Splits the dataset into training and validation sets
from sklearn.preprocessing import StandardScaler # Scales the data to avoid discrepancies in different feature scales
from google.colab import files # Allows working with files in Google Colab

# I uploaded and loaded the file "auto.mpg" into the Colab environment
data_path = 'auto-mpg.data'
column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year', 'origin', 'car_name']
df = pd.read_csv(data_path, sep='\s+', names=column_names, na_values='?')
df = df.drop('car_name', axis=1)  # Drop the car_name column
df = df.dropna()  # Remove missing data

# Prep the data, define features and target, and split the dataset
X = df.drop('mpg', axis=1)  # Features
y = df['mpg']  # Target (miles per gallon)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)  # Split the dataset
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Create deep and wide networks
def create_auto_mpg_deep_and_wide_networks(n_inputs: int, n_outputs: int) -> Tuple[keras.models.Model, keras.models.Model]:
    """Creates one deep neural network and one wide neural network.
    The networks should have similar parameter counts and the same activation functions."""

    # Deep Neural Network - Mariana (trench)
    mariana_input = keras.Input(shape=(n_inputs,))
    mariana_hidden = keras.layers.Dense(100, activation='relu')(mariana_input)
    mariana_hidden = keras.layers.Dense(50, activation='relu')(mariana_hidden)
    mariana_hidden = keras.layers.Dense(25, activation='relu')(mariana_hidden)
    mariana_output = keras.layers.Dense(n_outputs, activation='linear')(mariana_hidden)
    mariana_model = keras.Model(mariana_input, mariana_output)

    # Calculate total parameters for the deep model
    mariana_model.build(input_shape=(None, n_inputs))
    total_mariana_params = mariana_model.count_params()

    # Wide Neural Network - Sahara (flat and wide)
    sahara_input = keras.Input(shape=(n_inputs,))
    sahara_neuron_count = total_mariana_params // (n_inputs + 1)  # Distribute neurons
    sahara_hidden = keras.layers.Dense(sahara_neuron_count, activation='relu')(sahara_input)
    sahara_output = keras.layers.Dense(n_outputs, activation='linear')(sahara_hidden)
    sahara_model = keras.Model(sahara_input, sahara_output)

    # Compile both models with the same loss function
    mariana_optimizer = keras.optimizers.Adam()
    sahara_optimizer = keras.optimizers.Adam()
    loss = 'mse'  # Mean squared error for regression

    mariana_model.compile(optimizer=mariana_optimizer, loss=loss)
    sahara_model.compile(optimizer=sahara_optimizer, loss=loss)

    return mariana_model, sahara_model

# Hyperparameter tuning
def hyperparameter_tuning(n_inputs: int, n_outputs: int, X_train, y_train, X_val, y_val):
    """Test different variations of hyperparameters for the deep and wide networks."""

    # Hyperparameter options
    hyperparameter_sets = [
        (128, 64, 32),
        (36, 18, 9),
        (1024, 512, 256),
        (256, 128, 64),
        (10, 5, 2),
        (64, 32, 16),
        (128, 128, 64),
        (2048, 1024, 512),
        (512, 256, 128),
        (128, 32, 16)
    ]

    best_mariana_model = None
    best_sahara_model = None
    best_mariana_val_loss = float('inf')
    best_sahara_val_loss = float('inf')
    best_params = None

    # Loop through the hyperparameter sets
    for mariana_layer_sizes in hyperparameter_sets:
        print(f"Testing configuration: Deep Layers - {mariana_layer_sizes}")

        # Create the deep and wide models for the current configuration
        mariana_model, sahara_model = create_auto_mpg_deep_and_wide_networks(n_inputs, n_outputs)

        # Train Mariana
        mariana_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), verbose=1)

        # Train Sahara (Wide Model)
        sahara_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), verbose=1)

        # Evaluate the models on the validation data
        mariana_val_loss = mariana_model.evaluate(X_val, y_val, verbose=0)
        sahara_val_loss = sahara_model.evaluate(X_val, y_val, verbose=0)

        print(f"Mariana Validation Loss: {mariana_val_loss}")
        print(f"Sahara Validation Loss: {sahara_val_loss}")

        # Update the best models if this configuration is better
        if mariana_val_loss < best_mariana_val_loss:
            best_mariana_model = mariana_model
            best_mariana_val_loss = mariana_val_loss
            best_params = {'layers': mariana_layer_sizes, 'type': 'deep'}

        if sahara_val_loss < best_sahara_val_loss:
            best_sahara_model = sahara_model
            best_sahara_val_loss = sahara_val_loss
            best_params = {'layers': mariana_layer_sizes, 'type': 'wide'}

    # Print the best configuration and its validation loss
    print(f"Best Model Configuration: {best_params}")
    print(f"Best Mariana Model Validation Loss: {best_mariana_val_loss}")
    print(f"Best Sahara Model Validation Loss: {best_sahara_val_loss}")

    return best_mariana_model, best_sahara_model

# Use the number of features from the dataset
n_inputs = X_train.shape[1]
n_outputs = 1  # Output is the 'mpg' value (regression)

# Run hyperparameter tuning on the dataset
best_mariana_model, best_sahara_model = hyperparameter_tuning(n_inputs, n_outputs, X_train, y_train, X_val, y_val)


Testing configuration: Deep Layers - (128, 64, 32)
Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 162ms/step - loss: 647.3929 - val_loss: 540.8671
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 582.5296 - val_loss: 498.9104
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 556.0105 - val_loss: 436.6078
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 452.0192 - val_loss: 343.8242
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 360.5829 - val_loss: 221.5114
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 228.4571 - val_loss: 103.3219
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 91.7166 - val_loss: 46.6486
Epoch 8/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 44.