In [None]:
import os
import sys
import time
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from sklearn.preprocessing import OneHotEncoder

# Add the parent directory to path to import your modules
os.chdir("../..")
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
from lib import *


In [None]:
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

# Turn down for faster convergence
t0 = time.time()
train_samples = 5000

# Load data from https://www.openml.org/d/554
X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Turn up tolerance for faster convergence
clf = LogisticRegression(C=50.0 / train_samples, penalty="l1", solver="saga", tol=0.1)
clf.fit(X_train, y_train)
sparsity = np.mean(clf.coef_ == 0) * 100
score = clf.score(X_test, y_test)
# print('Best C % .4f' % clf.C_)
print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(10):
    l1_plot = plt.subplot(2, 5, i + 1)
    l1_plot.imshow(
        coef[i].reshape(28, 28),
        interpolation="nearest",
        cmap=plt.cm.RdBu,
        vmin=-scale,
        vmax=scale,
    )
    l1_plot.set_xticks(())
    l1_plot.set_yticks(())
    l1_plot.set_xlabel("Class %i" % i)
plt.suptitle("Classification vector for...")

run_time = time.time() - t0
print("Example run in %.3f s" % run_time)
plt.show()

In [None]:
# Convert labels to one-hot encoding for neural network
encoder = OneHotEncoder(sparse_output=False)
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_onehot = encoder.transform(y_test.reshape(-1, 1))

# Create validation set
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train_onehot, test_size=0.2, random_state=42
)

# Print dataset information
print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Input features: {X_train.shape[1]}")
print(f"Output classes: {y_train_onehot.shape[1]}")

### Pengaruh Depth

In [None]:
# Define depth variations with 3, 15, and 45 layers
depth_variations = [
    [784, 156, 10],
    [784, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 10],
    [784, 156, 156, 156, 156, 156,  156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156,  156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 10]
]


# Define width variations with 100, 200, 300 neurons per layer
width_variations = [
    [784, 100, 10],
    [784, 200, 10],
    [784, 300, 10]
]

In [None]:
# Create Neural Network
depth_3_network = NeuralNetwork(
    node_counts = depth_variations[0],
    activations = [Sigmoid()] * (len(depth_variations[0]) - 2) + [Softmax()],
    loss_function = MSE(),
    initialize_methods = ZeroInitializer()
)

depth_15_network = NeuralNetwork(
    node_counts = depth_variations[1],
    activations = [Sigmoid()] * (len(depth_variations[1]) - 2) + [Softmax()],
    loss_function = MSE(),
    initialize_methods = ZeroInitializer()
)

depth_45_network = NeuralNetwork(
    node_counts = depth_variations[2],
    activations = [Sigmoid()] * (len(depth_variations[2]) - 2) + [Softmax()],
    loss_function = MSE(),
    initialize_methods = ZeroInitializer()
)

In [None]:
# Create FFNN model
ffnn_depth_3 = FFNN(depth_3_network)

ffnn_depth_15 = FFNN(depth_15_network)

ffnn_depth_45 = FFNN(depth_45_network)

In [None]:
# Train the model
depth_3_history = ffnn_depth_3.fit(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    epochs=20,
    validation_data=(X_val, y_val),
    learning_rate=0.01,
    verbose=1
)

depth_15_history = ffnn_depth_15.fit(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    epochs=20,
    validation_data=(X_val, y_val),
    learning_rate=0.01,
    verbose=1
)

depth_45_history = ffnn_depth_45.fit(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    epochs=20,
    validation_data=(X_val, y_val),
    learning_rate=0.01,
    verbose=1
)

In [None]:
# Evaluate the model
evaluate_model(ffnn_depth_3, X_test, y_test_onehot)

evaluate_model(ffnn_depth_15, X_test, y_test_onehot)

evaluate_model(ffnn_depth_45, X_test, y_test_onehot)

In [None]:
# Plot the training history
plot_training_loss(depth_3_history, "Depth 3 Training History")

plot_training_loss(depth_15_history, "Depth 15 Training History")

plot_training_loss(depth_45_history, "Depth 45 Training History")

### Pengaruh Width

In [None]:
# Create Neural Network
width_100_network = NeuralNetwork(
    node_counts = width_variations[0],
    activations = [Sigmoid()] * (len(width_variations[0]) - 2) + [Softmax()],
    loss_function = MSE(),
    initialize_methods = ZeroInitializer()
)

width_200_network = NeuralNetwork(
    node_counts = width_variations[1],
    activations = [Sigmoid()] * (len(width_variations[1]) - 2) + [Softmax()],
    loss_function = MSE(),
    initialize_methods = ZeroInitializer()
)

width_300_network = NeuralNetwork(
    node_counts = width_variations[2],
    activations = [Sigmoid()] * (len(width_variations[2]) - 2) + [Softmax()],
    loss_function = MSE(),
    initialize_methods = ZeroInitializer()
)

In [None]:
# Create FFNN model
ffnn_width_100 = FFNN(width_100_network)

ffnn_width_200 = FFNN(width_200_network)

ffnn_width_300 = FFNN(width_300_network)

In [None]:
# Train the model
width_100_history = ffnn_width_100.fit(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    epochs=15,
    validation_data=(X_val, y_val),
    learning_rate=0.01,
    verbose=1
)

width_200_history = ffnn_width_200.fit(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    epochs=15,
    validation_data=(X_val, y_val),
    learning_rate=0.01,
    verbose=1
)

width_300_history = ffnn_width_300.fit(
    x_train=X_train,
    y_train=y_train,
    batch_size=32,
    epochs=15,
    validation_data=(X_val, y_val),
    learning_rate=0.01,
    verbose=1
)

In [None]:
# Evaluate the model
evaluate_model(ffnn_width_100, X_test, y_test_onehot)

evaluate_model(ffnn_width_200, X_test, y_test_onehot)

evaluate_model(ffnn_width_300, X_test, y_test_onehot)

In [None]:
# Plot the training history
plot_training_loss(width_100_history, "Width 100 Training History")

plot_training_loss(width_200_history, "Width 200 Training History")

plot_training_loss(width_300_history, "Width 300 Training History")