In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

from ffnn import FFNN 

def load_mnist_dataset():
    mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
    X, y = mnist.data, mnist.target
    X = X.astype('float32') / 255.0  
    y = y.astype('int')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    y_train_onehot = np.eye(10)[y_train]
    y_test_onehot = np.eye(10)[y_test]
    return X_train, X_test, y_train, y_test, y_train_onehot, y_test_onehot

def analyze_depth_and_width(X_train, X_test, y_train_onehot, y_test_onehot):
    print("Analyzing Depth and Width")
    width_variations = [[784, 128, 10], [784, 256, 10], [784, 512, 10]]
    depth_variations = [[784, 128, 10], [784, 128, 128, 10], [784, 128, 128, 128, 10]]
    
    for i, layer_sizes in enumerate(width_variations, 1):
        print(f"\nWidth Variation {i}: {layer_sizes}")
        model = FFNN(layer_sizes=layer_sizes, activation_func=['relu']*(len(layer_sizes)-2) + ['softmax'], loss_func='cce', learning_rate=0.01)
        model.train(X_train, y_train_onehot, X_test, y_test_onehot, epochs=10, batch_size=64, verbose=1)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(np.argmax(y_test_onehot, axis=1), np.argmax(y_pred, axis=1))
        print(f"Accuracy: {accuracy}")
        model.plot_weight_distribution()
        model.plot_weight_gradient_distribution()

    for i, layer_sizes in enumerate(depth_variations, 1):
        print(f"\nDepth Variation {i}: {layer_sizes}")
        model = FFNN(layer_sizes=layer_sizes, activation_func=['relu']*(len(layer_sizes)-2) + ['softmax'], loss_func='cce', learning_rate=0.01)
        model.train(X_train, y_train_onehot, X_test, y_test_onehot, epochs=10, batch_size=64, verbose=1)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(np.argmax(y_test_onehot, axis=1), np.argmax(y_pred, axis=1))
        print(f"Accuracy: {accuracy}")
        model.plot_weight_distribution()
        model.plot_weight_gradient_distribution()

def analyze_activation_functions(X_train, X_test, y_train_onehot, y_test_onehot):
    print("Analyzing Activation Functions")
    activations = ['linear', 'relu', 'sigmoid', 'tanh']
    for activation in activations:
        print(f"\nActivation Function: {activation}")
        model = FFNN(layer_sizes=[784, 128, 10], activation_func=[activation, 'softmax'], loss_func='cce', learning_rate=0.01)
        model.train(X_train, y_train_onehot, X_test, y_test_onehot, epochs=10, batch_size=64, verbose=1)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(np.argmax(y_test_onehot, axis=1), np.argmax(y_pred, axis=1))
        print(f"Accuracy: {accuracy}")
        model.plot_weight_distribution()
        model.plot_weight_gradient_distribution()

def analyze_learning_rates(X_train, X_test, y_train_onehot, y_test_onehot):
    print("Analyzing Learning Rates")
    learning_rates = [0.001, 0.01, 0.1]
    for lr in learning_rates:
        print(f"\nLearning Rate: {lr}")
        model = FFNN(layer_sizes=[784, 128, 10], activation_func=['relu', 'softmax'], loss_func='cce', learning_rate=lr)
        model.train(X_train, y_train_onehot, X_test, y_test_onehot, epochs=10, batch_size=64, verbose=1)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(np.argmax(y_test_onehot, axis=1), np.argmax(y_pred, axis=1))
        print(f"Accuracy: {accuracy}")
        model.plot_weight_distribution()
        model.plot_weight_gradient_distribution()

def analyze_weight_initialization(X_train, X_test, y_train_onehot, y_test_onehot):
    print("Analyzing Weight Initialization")
    init_methods = ['zero', 'uniform', 'normal']
    for method in init_methods:
        print(f"\nInitialization Method: {method}")
        model = FFNN(layer_sizes=[784, 128, 10], activation_func=['relu', 'softmax'], loss_func='cce', weight_init=method, learning_rate=0.01)
        model.train(X_train, y_train_onehot, X_test, y_test_onehot, epochs=10, batch_size=64, verbose=1)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(np.argmax(y_test_onehot, axis=1), np.argmax(y_pred, axis=1))
        print(f"Accuracy: {accuracy}")
        model.plot_weight_distribution()
        model.plot_weight_gradient_distribution()

def compare_with_sklearn(X_train, X_test, y_train, y_test):
    print("Comparing with Sklearn MLP")
    model_ffnn = FFNN(layer_sizes=[784, 128, 10], activation_func=['relu', 'softmax'], loss_func='cce', learning_rate=0.01)
    model_ffnn.train(X_train, np.eye(10)[y_train], X_test, np.eye(10)[y_test], epochs=10, batch_size=64, verbose=1)
    y_pred_ffnn = model_ffnn.predict(X_test)
    accuracy_ffnn = accuracy_score(y_test, np.argmax(y_pred_ffnn, axis=1))
    print(f"Custom FFNN Accuracy: {accuracy_ffnn}")

    model_sklearn = MLPClassifier(hidden_layer_sizes=(128,), activation='relu', solver='adam', learning_rate_init=0.01, max_iter=10)
    model_sklearn.fit(X_train, y_train)
    y_pred_sklearn = model_sklearn.predict(X_test)
    accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
    print(f"Sklearn MLP Accuracy: {accuracy_sklearn}")

def main():
    X_train, X_test, y_train, y_test, y_train_onehot, y_test_onehot = load_mnist_dataset()
    analyze_depth_and_width(X_train, X_test, y_train_onehot, y_test_onehot)
    analyze_activation_functions(X_train, X_test, y_train_onehot, y_test_onehot)
    analyze_learning_rates(X_train, X_test, y_train_onehot, y_test_onehot)
    analyze_weight_initialization(X_train, X_test, y_train_onehot, y_test_onehot)
    compare_with_sklearn(X_train, X_test, y_train, y_test)

if __name__ == "__main__":
    main()