In [7]:
import autograd.numpy as np  # We need to use this numpy wrapper to make automatic differentiation work later
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [8]:
def Runge_func(x):
    return 1.0/(1 + 25*x**2)

In [None]:
def ReLU(z):
    return np.where(z > 0, z, 0)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def mse(predict, target):
    return np.mean((predict - target) ** 2)

def cross_entropy(predict, target):
    return np.sum(-target * np.log(predict))

def cost_mse(input, layers, activation_funcs, target):
    predict = feed_forward_batch(input, layers, activation_funcs)
    return mse(predict, target)

def cost_cross_entropy(input, layers, activation_funcs, target):
    predict = feed_forward_batch(input, layers, activation_funcs)
    return cross_entropy(predict, target)

In [36]:
def config_network(num_hidden_layers, num_nodes_hlayers, num_output_targets, activation_func_hlayers, activation_func_output):
    """ 
    Take: num_hidden_layers, number of hidden layers,
    num_nodes_hlayers, number of nodes in the hidden layers,
    output_targets, number of targets to predict,
    activation_func_hlayers, activation function used in the hidden layers,
    activation_func_output, activation function for the output layer (can be linear and non linear).
    
    Return: list of layers where each element is number of nodes in layer corresponding to index, last element is output layer,
    and list of activation function for each layer.
    """
    layer_output_sizes = [num_nodes_hlayers]*num_hidden_layers + [num_output_targets]
    activation_funcs = [activation_func_hlayers]*num_hidden_layers + [activation_func_output]

    return layer_output_sizes, activation_funcs

def create_layers_batch(network_input_shape, layer_output_sizes):
    # network_input_shape has shape (number of input vectors, features)
    layers = []

    i_shape = network_input_shape
    for layer_output_size in layer_output_sizes:
        W = np.random.randn(i_shape[1], layer_output_size)
        b = np.random.randn(i_shape[0], 1)
        layers.append((W, b))

        i_shape = (i_shape[0], layer_output_size)
    return layers

def feed_forward_batch(inputs, layers, activation_funcs):
    a = inputs
    for (W, b), activation_func in zip(layers, activation_funcs):
        print(np.shape(a))
        print(np.shape(W))
        print(np.shape(b))
        z = a @ W + b
        a = activation_func(z)
    return a

def accuracy(predictions, targets):
    one_hot_predictions = np.zeros(predictions.shape)

    for i, prediction in enumerate(predictions):
        one_hot_predictions[i, np.argmax(prediction)] = 1
    return accuracy_score(one_hot_predictions, targets)

In [17]:
np.random.seed(2025)

n = 1000
x = np.linspace(-1, 1, n)
targets = Runge_func(x) + np.random.normal(0, 0.1, size=n)

x_train, x_test, t_train, t_test = train_test_split(x, targets, test_size=0.2)

x_train = x_train.reshape(-1, 1)
x_test = x_test.reshape(-1, 1)

scaler = StandardScaler()
scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)

In [37]:
from autograd import grad

gradient_func = grad(cost_mse, 1)

In [None]:
inputs = x_train_s
network_input_size = (x_train_s.shape[0], 1) # (individual samples, features)

hidden_layers = [0, 1, 2, 3]
nodes_hlayer = [5, 10, 25, 20]

nn_train_predict_accuracy_matrix = np.zeros([len(hidden_layers), len(nodes_hlayer)])

for i in range(len(hidden_layers)):
    for j in range(len(nodes_hlayer)):

        layer_output_sizes, activation_funcs = config_network(num_hidden_layers=hidden_layers[i],
                                                              num_nodes_hlayers=nodes_hlayer[j],
                                                              num_output_targets=1,
                                                              activation_func_hlayers=ReLU,
                                                              activation_func_output=ReLU)

        layers = create_layers_batch(network_input_size, layer_output_sizes)
        
        
        cost_mse(input, layers, activation_funcs, t_train)
        
        layers_grad = gradient_func(inputs, layers, activation_funcs, targets)

        predictions = feed_forward_batch(inputs, layers, activation_funcs)

        # accuracy = mean_squared_error(predictions, t_train)
        # # nn_trainpredict_accuracy_matrix = 

[1]
(800, 1)
(1, 1)
(800, 1)
[1]
(800, 1)
(1, 1)
(800, 1)
[1]
(800, 1)
(1, 1)
(800, 1)
[1]
(800, 1)
(1, 1)
(800, 1)
[5, 1]
(800, 1)
(1, 5)
(800, 1)
(800, 5)
(5, 1)
(800, 1)
[10, 1]
(800, 1)
(1, 10)
(800, 1)
(800, 10)
(10, 1)
(800, 1)
[25, 1]
(800, 1)
(1, 25)
(800, 1)
(800, 25)
(25, 1)
(800, 1)
[20, 1]
(800, 1)
(1, 20)
(800, 1)
(800, 20)
(20, 1)
(800, 1)
[5, 5, 1]
(800, 1)
(1, 5)
(800, 1)
(800, 5)
(5, 5)
(800, 1)
(800, 5)
(5, 1)
(800, 1)
[10, 10, 1]
(800, 1)
(1, 10)
(800, 1)
(800, 10)
(10, 10)
(800, 1)
(800, 10)
(10, 1)
(800, 1)
[25, 25, 1]
(800, 1)
(1, 25)
(800, 1)
(800, 25)
(25, 25)
(800, 1)
(800, 25)
(25, 1)
(800, 1)
[20, 20, 1]
(800, 1)
(1, 20)
(800, 1)
(800, 20)
(20, 20)
(800, 1)
(800, 20)
(20, 1)
(800, 1)
[5, 5, 5, 1]
(800, 1)
(1, 5)
(800, 1)
(800, 5)
(5, 5)
(800, 1)
(800, 5)
(5, 5)
(800, 1)
(800, 5)
(5, 1)
(800, 1)
[10, 10, 10, 1]
(800, 1)
(1, 10)
(800, 1)
(800, 10)
(10, 10)
(800, 1)
(800, 10)
(10, 10)
(800, 1)
(800, 10)
(10, 1)
(800, 1)
[25, 25, 25, 1]
(800, 1)
(1, 25)
(800, 1)
