In [None]:
import os
import sys

os.chdir("../..")

In [None]:
# Import custom FFNN implementation
import os
import sys
# Add the parent directory to path to import your modules
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

from lib import *

In [None]:
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

import time
import numpy as np

from sklearn.datasets import fetch_openml, load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
import logging

# logging.basicConfig(level=logging.INFO)

# # Turn down for faster convergence
# t0 = time.time()
# train_samples = 5000

# # Load data from https://www.openml.org/d/554
# logging.info("Loading data")
# X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

# random_state = check_random_state(0)
# permutation = random_state.permutation(X.shape[0])
# X = X[permutation]
# y = y[permutation]
# X = X.reshape((X.shape[0], -1))

# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, train_size=train_samples, test_size=10000
# )

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Turn up tolerance for faster convergence
# clf = LogisticRegression(C=50.0 / train_samples, penalty="l1", solver="saga", tol=0.1)
# clf.fit(X_train, y_train)
# sparsity = np.mean(clf.coef_ == 0) * 100
# score = clf.score(X_test, y_test)
# # print('Best C % .4f' % clf.C_)
# print("Sparsity with L1 penalty: %.2f%%" % sparsity)
# print("Test score with L1 penalty: %.4f" % score)

# Create Dummy Data
# Set parameters
n_samples = 1000  # Number of samples
n_features = 4   # Number of features (same as digits dataset)
n_classes = 5    # Number of target classes

# Generate random feature data
X = np.random.randn(n_samples, n_features)  # Random numbers following normal distribution

# Generate random integer labels (0-9)
y = np.random.randint(0, n_classes, size=n_samples)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Preprocess data for FFNN
from sklearn.preprocessing import OneHotEncoder

# Convert labels to one-hot encoding for neural network
encoder = OneHotEncoder(sparse_output=False)
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_onehot = encoder.transform(y_test.reshape(-1, 1))

# Create validation set
X_train_ffnn, X_val, Y_train_ffnn, Y_val = train_test_split(
    X_train, y_train_onehot, test_size=0.2, random_state=42
)



print(f"Training set: {X_train_ffnn.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Input features: {X_train.shape[1]}")
print(f"Output classes: {y_train_onehot.shape[1]}")

In [None]:
# Glossary

# Base configuration for the FFNN
base_config = {
    'learning_rate': 0.01,
    'batch_size': 32,
    'epochs': 20,  
    'loss_function': CCE(),
    'activation': Softmax(),
    'initializer': NormalInitializer(seed=42)
}


# Depth Variation
# [Input, Hidden Layers..., Output]
# Defines the number of hidden layers by varying the length of the list.
# Each hidden layer has the same number of neurons.
depth_variations = [
    [4, 3, 5],         # Shallow network (1 hidden layer with 64 neurons)
    [4, 3, 3, 5],     # Medium-depth network (2 hidden layers, each with 64 neurons)
    [4, 3, 3, 3, 5]  # Deep network (3 hidden layers, each with 64 neurons)
]

# Width Variation
# [Input, Hidden, Output]
# Defines the number of neurons in each layer while keeping the depth constant (1 hidden layer).
width_variations = [
    [4, 3, 5],   # Narrow network (fewer neurons in the hidden layer)
    [4, 5, 5],  # Medium-width network
    [4, 7, 5]   # Wide network (more neurons in the hidden layer)
]

# Activation Variation
# [Input, Hidden, Output]
# Defines the activation function for each layer.
# The number of activation functions must one less than the number of layers.
activation_variations = [
    [Linear(), Softmax()],
    [Sigmoid(), Sigmoid(), Sigmoid()],
    [ReLU(), Sigmoid(), Tanh(), Softmax()]
]

# Loss Function Variation
# Defines the loss function to be used for training.
loss_function_variations = [
    MSE(),
    BCE(),
    CCE()
]

# Weight Initialization Variation
# Defines the weight initialization strategy for each layer.
# The number of initializers must one less than the number of layers.
initializer_variations = [
    [ZeroInitializer(), ZeroInitializer()],
    [ZeroInitializer(), UniformInitializer(low=-1, high=1, seed=22), ZeroInitializer()],
    [ZeroInitializer(), NormalInitializer(mean=0.0, var=0.1, seed=22), ZeroInitializer(), ZeroInitializer()]
]

In [None]:
# FFNN Gettings Started

# 1. Create Neural Network
network = NeuralNetwork(
    node_counts=depth_variations[1],
    activations=activation_variations[1],
    loss_function=loss_function_variations[1],
    initialize_methods=initializer_variations[1]
)

# 2. Create FFNN model
ffnn_model = FFNN(network)

# 3. Train the model
train_history = ffnn_model.fit(
    x_train=X_train_ffnn, 
    y_train=Y_train_ffnn, 
    batch_size=base_config['batch_size'], 
    epochs=base_config['epochs'], 
    validation_data=(X_val, Y_val),
    learning_rate=base_config['learning_rate'], 
    verbose=1)

# 4. Evaluate the model
y_pred = ffnn_model.predict(X_test)

predicted_classes = np.argmax(y_pred, axis=1)
true_classes = np.argmax(y_test_onehot, axis=1)
accuracy = accuracy_score(true_classes, predicted_classes)
print(f"Accuracy: {accuracy:.4f}")

In [None]:
# Train history is a dictionary containing the loss and accuracy metrics
# train_history

In [None]:
ffnn_model.network.show()

In [None]:
ffnn_model.network.plot_weights([1, 3])

In [None]:
# Plot the gradients of the weights of the first and last layer
ffnn_model.network.plot_gradients([1, 3])