# The Perceptron

TODO

In [1]:
import numpy as np
import torch

In [123]:
class Perceptron:
    """
    A simple Perceptron implementation.

    The Perceptron is a fundamental binary classifier that uses a linear decision boundary 
    to classify data points. This implementation includes basic functionality of a Perceptron 
    with customizable activation functions.

    Parameters
    ----------
    dimension_data : int
        The number of features (excluding the bias) in the input data.

    Attributes
    ----------
    activation_function : callable
        The function used for activation.
    diff_activation_function : callable
        The derivative of the activation function.
    bias : float
        The bias term added to the input data.
    weights : np.ndarray
        The weights of the perceptron, initialized to None.
    activation_function_name : str
        The name of the activation function currently in use.

    Methods
    -------
    set_activation_function(name='sigmoid')
        Set the activation function by name.
    insert_bias(x)
        Insert a bias term into the input vector.
    loss_function(weights, x, y)
        Calculate the loss function given weights, inputs, and the target.
    compute_loss(predictions, targets)
        Compute the loss between predictions and targets.
    forward(X, y, epochs=100, learning_rate=0.01)
        Run the forward training loop for the perceptron.
    """
    def __init__(self, dimension_data) -> None:
        self.activation_function = None
        self.diff_activation_function = None
        self.bias = 1
        self.weights = None
        self.activation_function_name = 'sigmoid'
        self.average_loss = []
    
    def set_activation_function(self, name = 'sigmoid'):
        """
        Set the activation function for the perceptron.

        Parameters
        ----------
        name : str, optional
            The name of the activation function. Supported values are 'relu', 'sigmoid', and 'tanh'.
            Default is 'sigmoid'.

        Raises
        ------
        ValueError
            If the specified activation function name is not supported.
        """
        self.activation_function_name = name.lower()
        if name.lower() == 'relu':
            self.activation_function = lambda x: np.maximum(0, x)
            self.diff_activation_function = lambda x: np.where(x > 0, 1, 0)
        elif name.lower() == 'sigmoid':
            self.activation_function = lambda x: 1 / (1 + np.exp(-x))
            self.diff_activation_function = lambda x: self.activation_function(x) * (1 - self.activation_function(x))
        elif name.lower() == 'tanh':
            self.activation_function = lambda x: np.tanh(x)
            self.diff_activation_function = lambda x: 1 - np.tanh(x)**2
        else:
            raise ValueError("Unsupported activation function. Choose 'relu', 'sigmoid', or 'tanh'.")
        
    def insert_bias(self, x):
        return np.insert(x, 0, self.bias)  # Always insert bias at index 0
    
    def loss_function(self, weights, x, y):
        prediction = self.activation_function(np.dot(weights, x))
        return prediction - y

    def compute_loss(self, predictions, targets):
        # Check which activation function is used based on a stored name attribute
        if self.activation_function_name == 'sigmoid':
            # Binary cross-entropy for sigmoid activation
            return -np.mean(targets * np.log(predictions + 1e-9) + (1 - targets) * np.log(1 - predictions + 1e-9))
        elif self.activation_function_name == 'relu' or self.activation_function_name == 'tanh':
            # Mean squared error for ReLU or tanh activations in a regression context
            return np.mean((predictions - targets) ** 2)
        else:
            raise ValueError(f"Unsupported or undefined activation function name: {self.activation_function_name}")

    def backward(self, X_i, error, z, learning_rate):
        """
        Perform the backward pass, updating the weights based on the error and gradient.

        Parameters
        ----------
        X_i : np.ndarray
            The input data for a single instance, including the bias.
        error : float
            The difference between the predicted and actual target.
        z : float
            The linear combination of weights and inputs before activation.
        learning_rate : float
            The rate at which the weights should be updated.
        """
        gradient = error * self.diff_activation_function(z) * X_i  # Compute gradient
        self.weights -= learning_rate * gradient  # Update weights

    def forward(self, X):
        """
        Perform the forward pass by computing the output of the perceptron.

        Parameters
        ----------
        X : np.ndarray
            The input data array where each row represents an instance.

        Returns
        -------
        np.ndarray
            The predictions computed by the network.
        """
        X = np.hstack((np.ones((X.shape[0], 1)), X))  # Insert bias
        z = np.dot(X, self.weights)  # Compute the linear combination
        predictions = self.activation_function(z)  # Apply activation function
        return predictions

                
    def train(self, X, y, epochs=100, learning_rate=0.01):
        """
        The perceptron Supervised Training using a provided dataset.

        Parameters
        ----------
        X : np.ndarray
            The input data matrix where each row represents an instance and each column a feature.
        y : np.ndarray
            The target output vector where each element corresponds to a target for a corresponding row in X.
        epochs : int, optional
            The number of epochs for which the model should be trained. Defaults to 100.
        learning_rate : float, optional
            The learning rate used for updating the weights. Defaults to 0.01.

        Notes
        -----
        This method updates the weights of the perceptron for a number of epochs, using the specified learning rate.
        It tracks and prints the average loss per epoch to monitor training progress.
        """
        # Start random weights
        self.weights = np.random.randn(X.shape[1] + 1)
        # Ensure X has bias terms inserted; reshape X to include bias as the first column
        X = np.hstack((np.ones((X.shape[0], 1)), X))  # Add a column of ones for the bias
        
        
        for epoch in range(epochs):
            total_loss = 0
            for i in range(len(X)):
                z = np.dot(X[i], self.weights)
                predictions = self.activation_function(z)
                error = predictions - y[i]
                total_loss += np.sum(self.compute_loss(np.array([predictions]), np.array([y[i]])))  # Compute and accumulate loss
                self.backward(X[i], error, z, learning_rate)

    
            average_loss = total_loss / len(X)
            self.average_loss.append(average_loss)
            print(f'Epoch {epoch + 1}, Average Loss: {average_loss}')
    
    def predict(self, X):
        """
        Predict the output for the given input data using the trained perceptron model.
    
        Parameters
        ----------
        X : np.ndarray
            The input data matrix where each row represents an instance and each column represents a feature.
    
        Returns
        -------
        np.ndarray
            The predictions generated by the perceptron for each input instance.
        """
        # Add the bias term to the input data
        X = np.hstack((np.ones((X.shape[0], 1)), X))  # Insert bias
        z = np.dot(X, self.weights)  # Compute the linear combination of inputs and weights
        predictions = self.activation_function(z)  # Apply the activation function
        return predictions

    def predict_classes(self, X):
        """
        Predict the class labels for the given input data using the trained perceptron model.
    
        Parameters
        ----------
        X : np.ndarray
            The input data array where each row represents an instance.
    
        Returns
        -------
        np.ndarray
            The class predictions (0 or 1) for each instance, based on a threshold of 0.5.
        """
        predictions = self.predict(X)  # This should use your existing predict method that outputs probabilities
        class_labels = (predictions > 0.5).astype(int)  # Convert probabilities to 0 or 1 based on the threshold
        return class_labels




In [109]:
from sklearn.datasets import load_iris
# Load the Iris dataset
data = load_iris()
y = data.target
X = data.data

In [114]:
data

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [124]:
perceptron = Perceptron(dimension_data=4)  # Initialize with one feature
perceptron.set_activation_function('tanh')  # Set sigmoid for binary classification

In [125]:
perceptron.train(X, y, epochs=50, learning_rate=0.01)


Epoch 1, Average Loss: 0.42985345345315884
Epoch 2, Average Loss: 0.6657672956052593
Epoch 3, Average Loss: 0.6652332380622165
Epoch 4, Average Loss: 0.6647420114678856
Epoch 5, Average Loss: 0.6641142172586383
Epoch 6, Average Loss: 0.6631033779604701
Epoch 7, Average Loss: 0.6608483454108276
Epoch 8, Average Loss: 0.6404538028946516
Epoch 9, Average Loss: 0.4956273930207642
Epoch 10, Average Loss: 0.42770111181437037
Epoch 11, Average Loss: 0.3996392763359263
Epoch 12, Average Loss: 0.38586848953599895
Epoch 13, Average Loss: 0.37803932326942863
Epoch 14, Average Loss: 0.3730955294417655
Epoch 15, Average Loss: 0.36976887545552617
Epoch 16, Average Loss: 0.3674222011053856
Epoch 17, Average Loss: 0.3656949535061741
Epoch 18, Average Loss: 0.36436989439869594
Epoch 19, Average Loss: 0.36331210662181956
Epoch 20, Average Loss: 0.3624361375429332
Epoch 21, Average Loss: 0.3616868712235303
Epoch 22, Average Loss: 0.3610280827560844
Epoch 23, Average Loss: 0.3604354994596774
Epoch 24, Ave

In [100]:
perceptron.average_loss

[1.067308668850862,
 1.0452320369485901,
 1.0237871910375036,
 1.003034640598111,
 0.9830214870223547,
 0.9637814136261573,
 0.9453355208263541,
 0.927693786233213,
 0.9108568922442046,
 0.8948181765982491,
 0.8795655080248009,
 0.8650829505377436,
 0.8513521404324426,
 0.8383533501396837,
 0.8260662491696488,
 0.814470394970023,
 0.803545498256047,
 0.7932715114201401,
 0.7836285877990903,
 0.7745969559421257,
 0.7661567479143407,
 0.7582878148336526,
 0.7509695566460676,
 0.7441807867782454,
 0.7378996459072766,
 0.7321035728302902,
 0.7267693345379292,
 0.7218731123765731,
 0.7173906369033085,
 0.7132973609179462,
 0.7095686583187388,
 0.7061800358667857,
 0.7031073455265775,
 0.7003269865440016,
 0.6978160885356937,
 0.6955526692879475,
 0.6935157634196919,
 0.6916855203285098,
 0.6900432717584886,
 0.6885715708227288,
 0.687254205363041,
 0.6860761891633793,
 0.6850237348111474,
 0.6840842119958174,
 0.6832460948240106,
 0.6824989013852665,
 0.6818331283830945,
 0.6812401831987058