In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from typing import List, Callable, Tuple, Dict
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# ACTIVATION FUNCTIONS
# ============================================================================

def logistic(x):
    """
    Logistic activation function: 1 / (1 + e^-x)
    Clips input to prevent overflow in exponential calculation
    """
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def relu(x):
    """
    ReLU (Rectified Linear Unit) activation function: max(0, x)
    Returns x if x > 0, otherwise returns 0
    """
    return np.maximum(0, x)

def tanh(x):
    """
    Hyperbolic tangent activation function: tanh(x)
    Output range: (-1, 1)
    """
    return np.tanh(x)

def linear(x):
    """
    Linear activation function (identity): f(x) = x
    Typically used in output layer for regression tasks
    """
    return x

# Dictionary for easy activation function lookup
ACTIVATION_FUNCTIONS = {
    'logistic': logistic,
    'relu': relu,
    'tanh': tanh,
    'linear': linear
}

# ============================================================================
# ARTIFICIAL NEURAL NETWORK (ANN) CLASS
# ============================================================================

class ANN:
    """
    Multi-layer Feedforward Artificial Neural Network

    This implementation allows configurable:
    - Number of layers
    - Number of neurons per layer
    - Activation function per layer

    The network is trained using PSO (not backpropagation)
    """

    def __init__(self, layer_sizes: List[int], activation_functions: List[str]):
        """
        Initialize the ANN architecture

        Args:
            layer_sizes: List of integers, e.g., [8, 10, 5, 1]
                        Specifies number of neurons in each layer
                        First element = input layer, last = output layer
            activation_functions: List of activation function names
                                 One for each layer AFTER input layer
                                 e.g., ['relu', 'relu', 'linear'] for 3 hidden/output layers
        """
        # Store architecture configuration
        self.layer_sizes = layer_sizes
        self.num_layers = len(layer_sizes)

        # Validate activation functions list
        if len(activation_functions) != self.num_layers - 1:
            raise ValueError(f"Need {self.num_layers - 1} activation functions, got {len(activation_functions)}")

        # Convert activation function names to actual functions
        self.activation_functions = []
        for func_name in activation_functions:
            if func_name not in ACTIVATION_FUNCTIONS:
                raise ValueError(f"Unknown activation function: {func_name}")
            self.activation_functions.append(ACTIVATION_FUNCTIONS[func_name])

        # Initialize weight matrices and bias vectors
        # These will be set by PSO during optimization
        self.weights = []
        self.biases = []

        # Create placeholders for weights and biases between each layer
        for i in range(self.num_layers - 1):
            # Weight matrix from layer i to layer i+1
            # Shape: (neurons_in_layer_i, neurons_in_layer_i+1)
            weight_matrix = np.zeros((layer_sizes[i], layer_sizes[i + 1]))
            self.weights.append(weight_matrix)

            # Bias vector for layer i+1
            # Shape: (neurons_in_layer_i+1,)
            bias_vector = np.zeros(layer_sizes[i + 1])
            self.biases.append(bias_vector)

    def set_parameters(self, parameter_vector: np.ndarray):
        """
        Set all weights and biases from a flat 1D parameter vector

        This method is called by PSO to decode a particle's position
        into the ANN's weights and biases

        Args:
            parameter_vector: 1D numpy array containing all parameters
                            Order: weights for layer 0→1, biases for layer 1,
                                   weights for layer 1→2, biases for layer 2, etc.
        """
        index = 0

        # Extract and set weights for each layer connection
        for i in range(len(self.weights)):
            # Calculate how many weights connect layer i to layer i+1
            weight_count = self.layer_sizes[i] * self.layer_sizes[i + 1]

            # Extract the weights and reshape into matrix form
            self.weights[i] = parameter_vector[index:index + weight_count].reshape(
                self.layer_sizes[i], self.layer_sizes[i + 1]
            )
            index += weight_count

        # Extract and set biases for each layer (except input layer)
        for i in range(len(self.biases)):
            # Number of biases = number of neurons in target layer
            bias_count = self.layer_sizes[i + 1]

            # Extract the biases
            self.biases[i] = parameter_vector[index:index + bias_count]
            index += bias_count

    def get_parameter_count(self) -> int:
        """
        Calculate total number of parameters (weights + biases) in the network

        This determines the dimensionality of the PSO search space

        Returns:
            Integer count of total parameters
        """
        count = 0

        # Count weights and biases for each layer connection
        for i in range(self.num_layers - 1):
            # Weights between layer i and i+1
            count += self.layer_sizes[i] * self.layer_sizes[i + 1]
            # Biases for layer i+1
            count += self.layer_sizes[i + 1]

        return count

    def forward(self, inputs: np.ndarray) -> np.ndarray:
        """
        Perform forward propagation through the network

        Computes output by passing inputs through all layers:
        For each layer: z = W^T * a + b, then a_next = activation(z)

        Args:
            inputs: Input vector (1D numpy array)
                   Shape: (n_features,)

        Returns:
            Output vector from final layer
            Shape: (n_outputs,)
        """
        # Start with input layer activation
        activation = inputs

        # Propagate through each layer
        for i in range(len(self.weights)):
            # Linear transformation: z = a * W + b
            # where a is activation from previous layer
            z = np.dot(activation, self.weights[i]) + self.biases[i]

            # Apply non-linear activation function
            activation = self.activation_functions[i](z)

        # Return final layer output
        return activation

    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Make predictions for multiple input samples

        Args:
            X: Input matrix of shape (n_samples, n_features)

        Returns:
            Predictions of shape (n_samples, n_outputs)
        """
        predictions = []

        # Process each sample individually
        for sample in X:
            prediction = self.forward(sample)
            predictions.append(prediction)

        return np.array(predictions)