Network with two hidden layers and ReLU activation function:

In [None]:
# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score, classification_report
from ucimlrepo import fetch_ucirepo
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Input
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt


np.random.seed(0)

# Load the dataset
covertype = fetch_ucirepo(id=31)
X = covertype.data.features.to_numpy()
y = covertype.data.targets.to_numpy()

# One-hot representation of labels, lecture notes week 43
y_onehot = to_categorical(y-1) #The first class starts at 0

# Split into train and test data, 40% test data
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.4, random_state=1)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Define the model, lecture notes week 43
def create_network(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd):
  model = Sequential([
      # Using relu activation function, and L2-regularization. 
      Input(shape=(X_train.shape[1],)),
      Dense(n_neurons_layer1, activation=activation, kernel_regularizer=l2(lmbd)),
      BatchNormalization(),
      Dense(n_neurons_layer2, activation=activation, kernel_regularizer=l2(lmbd)),
      BatchNormalization(),
      Dense(n_categories, activation='softmax')
  ])
  # Defining the optimizer, loss and metrics.
  model.compile(
      optimizer=tf.keras.optimizers.Adam(learning_rate=eta),
      loss='categorical_crossentropy',
      metrics=['accuracy'])
  return model


In [None]:
# Model 1

# Hyperparameters
epochs = 100
batch_size = 512
n_neurons_layer1 = 128
n_neurons_layer2 = 64
n_categories = y_train.shape[1]
eta = 0.001 #learning rate
lmbd = 0.002 #l2 regularization
activation = "relu"

# Create network
model_1 = create_network(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd)

# Train the model
history = model_1.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
    validation_data=(X_test, y_test),verbose=0)

# Calculate test-accuracy and loss for the model
test_loss, test_accuracy = model_1.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict on the test set
predictions = model_1.predict(X_test)
predicted_classes = predictions.argmax(axis=1)
true_classes = y_test.argmax(axis=1)

# Plot the Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = np.unique(y)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # To show the results in %. 
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)  # Add 1 to shift labels to 1-based indexing
disp.plot(cmap="viridis", values_format=".2f")
plt.title("Confusion Matrix for Model 1 (%)")
plt.show()


# Predict on the training set
train_predictions = model_1.predict(X_train)
train_predicted_classes = train_predictions.argmax(axis=1)
train_true_classes = y_train.argmax(axis=1)

# Generate classification report for training set
train_report = classification_report(train_true_classes, train_predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Training Classification Report for Model 1:")
print(train_report)

# Generate classification report for test set
test_report = classification_report(true_classes, predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Test Classification Report for Model 1:")
print(test_report)

In [None]:
# Model 2

# Hyperparameters
epochs = 100
batch_size = 512
n_neurons_layer1 = 64
n_neurons_layer2 = 32
n_categories = y_train.shape[1]
eta = 0.001
lmbd = 0.001
activation = "relu"

# Create network
model_2 = create_network(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd)

# Train the model
history = model_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
    validation_data=(X_test, y_test),verbose=0)

# Calculate test-accuracy and loss for the model
test_loss, test_accuracy = model_2.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict on the test set
predictions = model_2.predict(X_test)
predicted_classes = predictions.argmax(axis=1)
true_classes = y_test.argmax(axis=1)


# Plot the Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = np.unique(y)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # To show the results in %. 
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)  # Add 1 to shift labels to 1-based indexing
disp.plot(cmap="viridis", values_format=".2f")
plt.title("Confusion Matrix for Model 2 (%)")
plt.show()

# Predict on the training set
train_predictions = model_2.predict(X_train)
train_predicted_classes = train_predictions.argmax(axis=1)
train_true_classes = y_train.argmax(axis=1)

# Generate classification report for training set
train_report = classification_report(train_true_classes, train_predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Training Classification Report for Model 2:")
print(train_report)

# Generate classification report for test set
test_report = classification_report(true_classes, predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Test Classification Report for Model 2:")
print(test_report)

Network with two hidden layers, and leaky-ReLU activation function:

In [None]:
from tensorflow.keras.layers import LeakyReLU

np.random.seed(0)

# Define the model, lecture notes week 43
def create_network_leaky_relu(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd):
  model = Sequential([
      Input(shape=(X_train.shape[1],)),
      Dense(n_neurons_layer1, kernel_regularizer=l2(lmbd)),
      LeakyReLU(negative_slope=0.1),  # LeakyReLU
      BatchNormalization(),
      Dense(n_neurons_layer2, kernel_regularizer=l2(lmbd)),
      LeakyReLU(negative_slope=0.1),  # LeakyReLU
      BatchNormalization(),
      Dense(n_categories, activation='softmax')
  ])
# Defining the optimizer, loss and metrics.
  model.compile(
      optimizer=tf.keras.optimizers.Adam(learning_rate=eta),
      loss='categorical_crossentropy',
      metrics=['accuracy']
  )
  return model

In [None]:
# Model 3

# Hyperparameters
epochs = 100
batch_size = 512
n_neurons_layer1 = 128
n_neurons_layer2 = 64
n_categories = y_train.shape[1]
eta = 0.001
lmbd = 0.001

# Create network
model_3 = create_network_leaky_relu(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd)

# Train the model
history = model_3.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
    validation_data=(X_test, y_test),verbose=0)

# Calculate test-accuracy and loss for the model
test_loss, test_accuracy = model_3.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict on the test set
predictions = model_3.predict(X_test)
predicted_classes = predictions.argmax(axis=1)
true_classes = y_test.argmax(axis=1)


# Plot the Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = np.unique(y)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # To show the results in %. 
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)  # Add 1 to shift labels to 1-based indexing
disp.plot(cmap="viridis", values_format=".2f")
plt.title("Confusion Matrix for Model 3 (%)")
plt.show()


# Predict on the training set
train_predictions = model_3.predict(X_train)
train_predicted_classes = train_predictions.argmax(axis=1)
train_true_classes = y_train.argmax(axis=1)

# Generate classification report for training set
train_report = classification_report(train_true_classes, train_predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Training Classification Report for Model 3:")
print(train_report)

# Generate classification report for test set
test_report = classification_report(true_classes, predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Test Classification Report for Model 3:")
print(test_report)

In [None]:
# Model 4

# Hyperparameters
epochs = 100
batch_size = 512
n_neurons_layer1 = 64
n_neurons_layer2 = 32
n_categories = y_train.shape[1]
eta = 0.001
lmbd = 0.001

# Create network
model_4 = create_network_leaky_relu(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd)

# Train the model
history = model_4.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
    validation_data=(X_test, y_test),verbose=0)

# Calculate test-accuracy and loss for the model
test_loss, test_accuracy = model_4.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict on the test set
predictions = model_4.predict(X_test)
predicted_classes = predictions.argmax(axis=1)
true_classes = y_test.argmax(axis=1)


# Plot the Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = np.unique(y)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # To show the results in %. 
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)  # Add 1 to shift labels to 1-based indexing
disp.plot(cmap="viridis", values_format=".2f")
plt.title("Confusion Matrix for Model 4 (%)")
plt.show()


# Predict on the training set
train_predictions = model_4.predict(X_train)
train_predicted_classes = train_predictions.argmax(axis=1)
train_true_classes = y_train.argmax(axis=1)

# Generate classification report for training set
train_report = classification_report(train_true_classes, train_predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Training Classification Report for Model 4:")
print(train_report)

# Generate classification report for test set
test_report = classification_report(true_classes, predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Test Classification Report for Model 4:")
print(test_report)

Network with two hidden layers, and ELU activation function:

In [None]:
# Model 5
from sklearn.metrics import classification_report

# Hyperparameters
epochs = 100
batch_size = 512
n_neurons_layer1 = 128
n_neurons_layer2 = 64
n_categories = y_train.shape[1]
eta = 0.001
lmbd = 0.001
activation = "elu"

# Create network
model_5 = create_network(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd)

# Train the model
history = model_5.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
    validation_data=(X_test, y_test),verbose=0)

# Calculate test-accuracy and loss for the model
test_loss, test_accuracy = model_5.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict on the test set
predictions = model_5.predict(X_test)
predicted_classes = predictions.argmax(axis=1)
true_classes = y_test.argmax(axis=1)

# Plot the Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = np.unique(y)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # To show the results in %. 
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)  # Add 1 to shift labels to 1-based indexing
disp.plot(cmap="viridis", values_format=".2f")
plt.title("Confusion Matrix for Model 5 (%)")
plt.show()


# Predict on the training set
train_predictions = model_5.predict(X_train)
train_predicted_classes = train_predictions.argmax(axis=1)
train_true_classes = y_train.argmax(axis=1)

# Generate classification report for training set
train_report = classification_report(train_true_classes, train_predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Training Classification Report for Model 5:")
print(train_report)

# Generate classification report for test set
test_report = classification_report(true_classes, predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Test Classification Report for Model 5:")
print(test_report)

In [None]:
# Model 6

# Hyperparameters
epochs = 100
batch_size = 512
n_neurons_layer1 = 64
n_neurons_layer2 = 32
n_categories = y_train.shape[1]
eta = 0.001
lmbd = 0.001
activation = "elu"

# Create network
model_6 = create_network(n_neurons_layer1, n_neurons_layer2, n_categories, eta, lmbd)

# Train the model
history = model_6.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
    validation_data=(X_test, y_test),verbose=0)

# Calculate test-accuracy and loss for the model
test_loss, test_accuracy = model_6.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict on the test set
predictions = model_6.predict(X_test)
predicted_classes = predictions.argmax(axis=1)
true_classes = y_test.argmax(axis=1)

# Plot the Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = np.unique(y)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # To show the results in %. 
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=class_names)  # Add 1 to shift labels to 1-based indexing
disp.plot(cmap="viridis", values_format=".2f")
plt.title("Confusion Matrix for Model 6 (%)")
plt.show()


# Predict on the training set
train_predictions = model_6.predict(X_train)
train_predicted_classes = train_predictions.argmax(axis=1)
train_true_classes = y_train.argmax(axis=1)

# Generate classification report for training set
train_report = classification_report(train_true_classes, train_predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Training Classification Report for Model 6:")
print(train_report)

# Generate classification report for test set
test_report = classification_report(true_classes, predicted_classes, target_names=[f"Class {i+1}" for i in range(n_categories)])
print("Test Classification Report for Model 6:")
print(test_report)