In [145]:
import numpy as np
import pandas as pd
import random

## Example Perceptron 

In [146]:
def generate_data(self) -> None:
        
        """
        This function generates artificial data for the exercise.
        """

        self.X = np.empty(100)
        self.y = np.empty(100)
        
        for i in range(100):
            if random.random() < 0.5:
                self.X[i] = np.random.normal(loc=-1.25, scale=0.75)
                self.y[i] = 0
            else:
                self.X[i] = np.random.normal(loc=1.25, scale=0.75)
                self.y[i] = 1

In [147]:
df = pd.read_csv("../datasets/advertising.csv")
df = df[:20]
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
5,8.7,48.9,75.0,7.2
6,57.5,32.8,23.5,11.8
7,120.2,19.6,11.6,13.2
8,8.6,2.1,1.0,4.8
9,199.8,2.6,21.2,15.6


In [148]:
# from perceptron import Perceptron
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop('Sales', axis=1), df['Sales'], test_size=0.2, random_state=42)

# # Standardize the data
# from sklearn.preprocessing import StandardScaler

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

print(type(X_train))
print(type(y_train))
print(type(X_test))
print(type(y_test))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [149]:
class Perceptron:
    
    def __init__(self, bias : int = 1, loss_function : str = None, activation_function : str = None):
        
        self.__bias = bias
        self.__loss_function = loss_function
        self.__activation_function = activation_function

    def fit(self, X_train, y_train):
        
        # Initialize weights with small random values
        self.__weights = np.random.randn(X_train.shape[1]) / np.sqrt(X_train.shape[1])
        
        self.X_train = X_train.to_numpy()
        self.y_train = y_train.to_numpy()
        
        self.__get_ml_type()
        
    def train(self, epochs : int = 100, learning_rate : int = 0.01):
        
        self.learning_rate = learning_rate

        for epoch in range(epochs):
            
            # Forward pass
            y_pred = self.forward(self.X_train)
    
            # Compute and print loss
            loss = self.loss(y_pred, self.y_train)

            # Backward pass
            self.backward(y_pred, self.y_train)

            if epoch % 10 == 0:
                # print(f"Y_Pred: {y_pred}")
                print(f'Epoch: {epoch}, Loss: {loss}\n')
                
    
    ############################################################################################################
    ####################################### Forward and Backward Pass ##########################################
    ############################################################################################################
    def forward(self, X):
        
        return self.activation_function(np.dot(X, self.__weights) + self.__bias)

    def backward(self, y_pred, y_true):
        
        if self.__ml_type == "binary":
            self.backward_classification(y_pred, y_true)
        else:
            self.backward_regression(y_pred, y_true)

    def backward_regression(self, y_pred, y_true):
        
        dL_dw = 0
        dL_db = 0
        
        if self.__loss_function == "mse":
            
            # MSE derivative is (y_pred - y_true)
            # hyperbolic tangent derivative is 1 - tanh^2(x)
            dL_dw = (y_pred - y_true) * ( 1 - y_pred**2) * self.X_train
            dL_db = (y_pred - y_true) * ( 1 - y_pred**2)
        
        elif self.__loss_function == "mae":
            
            # MAE derivative is sign(y_pred - y_true)
            # hyperbolic tangent derivative is 1 - tanh^2(x)
            dL_dw = np.sign(y_pred - y_true) * ( 1 - y_pred**2) * self.X_train
            dL_db = np.sign(y_pred - y_true) * ( 1 - y_pred**2)
            
        self.__weights -= self.learning_rate * dL_dw
        self.__bias -= self.learning_rate * dL_db
        
    def backward_classification(self, y_pred, y_true):
        
        # derivative of the cross entropy loss function - ( y_true / y_pred ) - ( ( 1 - y_true ) / ( 1 - y_pred ) )
        # derivative of the sigmoid function sigmoid(x) * ( 1 - sigmoid(x) )
        dL_dw = - ( ( y_true / y_pred ) - ( ( 1 - y_true ) / ( 1 - y_pred ) ) ) * y_pred * ( 1 - y_pred ) * self.X_train
        dL_db = - ( ( y_true / y_pred ) - ( ( 1 - y_true ) / ( 1 - y_pred ) ) ) * y_pred * ( 1 - y_pred )
        
        self.__weights -= self.learning_rate * dL_dw
        self.__bias -= self.learning_rate * dL_db    
        

    ############################################################################################################
    ####################################### Activation Functions ###############################################
    ############################################################################################################
    def activation_function(self, x):
        
        return self.get_actvation_function(x)
    
    def get_actvation_function(self, x):
        
        if (self.__activation_function is None and self.__ml_type == "binary") or (self.__activation_function == "sigmoid" and self.__ml_type == "binary"):
            return self.sigmoid(x)
        
        elif (self.__activation_function is None and self.__ml_type == "regression") or (self.__activation_function == "tanh" and self.__ml_type == "regression"):
            return self.tan_h(x)
    
    def sigmoid(self, x):
        # Clip the values of y_pred to avoid division by zero
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))
    
    def tan_h(self, x):
        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

        
    ############################################################################################################
    ####################################### Loss Functions #####################################################
    ############################################################################################################   
    def loss(self, y_pred, y_true):
        
        return self.get_loss_function(y_pred, y_true)
    
    def get_loss_function(self, y_pred, y_true):
        
        if (self.__loss_function is None and self.__ml_type == "binary") or (self.__loss_function == "binary_crossentropy" and self.__ml_type == "binary"):
            return self.__binary_crossentropy(y_pred, y_true)
        elif (self.__loss_function is None and self.__ml_type == "regression") or (self.__loss_function == "mse" and self.__ml_type == "regression"):
            return self.__mse(y_pred, y_true)
        elif self.__loss_function == "mae" and self.__ml_type == "regression":
            return self.__mae(y_pred, y_true)
        
    def __binary_crossentropy(self, y_pred, y_true):
        epsilon = 1e-7
        losses = - (y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))
        return np.mean(losses)
    
    def __mse(self, y_pred, y_true):
        return np.mean( ( y_pred - y_true ) ** 2 )
    
    def __mae(self, y_pred, y_true):
        return np.mean( np.abs( y_pred - y_true ) )
                      
    ############################################################################################################
    ####################################### Helper Functions ###################################################
    ############################################################################################################
    def __get_ml_type(self):
        
        if np.unique(self.y_train).shape[0] == 2:
            self.__ml_type = "binary"
        else:
            self.__ml_type = "regression"
            
    
    ### Backward Pass MSE and Sigmoid function
    # Compute the derivative of the mse loss function
    # # dL_dw = sum(map(lambda xi, y_pred_i, y_true_i : ( y_true_i - ( 1 / 1 + np.exp(-y_pred_i) ) * ( ( xi * np.exp(y_pred_i) ) /  ( ( 1 + np.exp(-y_pred_i) ) **2 ) ) ), self.X_train, y_pred, y_true))
    # dL_dw = np.sum( y_true - ( 1 / 1 + np.exp(-y_pred) ) * ( ( self.X_train.T * np.exp(-y_pred) ) / ( ( 1 + np.exp(-y_pred) ) **2 ) ) , axis = 1)
    
    # # dL_db = sum(map(lambda y_pred_i, y_true_i : ( y_true_i - ( 1 / 1 + np.exp(-y_pred_i) ) * ( ( np.exp(-y_pred_i) ) / ( ( 1 + np.exp(-y_pred_i) ) **2 ) ) ), y_pred, y_true))
    # dL_db = np.sum( y_true - ( 1 / 1 + np.exp(-y_pred) ) * ( ( np.exp(-y_pred) ) / ( ( 1 + np.exp(-y_pred) ) **2 ) ) )
    
    # self.__weights -= self.learning_rate * dL_dw
    # self.__bias -= self.learning_rate * dL_db

    # # self.__weights = self.__weights + self.learning_rate * np.dot((y_true - y_pred), self.X_train)
    # # self.__bias = self.__bias + self.learning_rate * np.sum(y_true - y_pred)
    
    

In [150]:
# class Perceptron:
#     def __init__(self, bias=1):
#         self.__bias = bias
#         self.__weights = None
#         self.learning_rate = None  # Initialize learning_rate

#     def fit(self, X_train, y_train):
#         # Initialize weights with small random values
#         self.__weights = np.random.randn(X_train.shape[1]) / np.sqrt(X_train.shape[1])
        

#         self.X_train = X_train
#         self.y_train = y_train.to_numpy()

#     def sigmoid(self, x):
#         # Clip values to avoid overflow in np.exp
#         return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

#     def forward(self, X):
#         return self.sigmoid(np.dot(X, self.__weights) + self.__bias)

#     def loss(self, y_pred, y_true):
#         epsilon = 1e-7
#         loss = - (y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))
#         return np.mean(loss)

#     def backward(self, y_pred, y_true):
        
#         epsilon = 1e-7
#         # d_cross_entropy = sum(map(lambda y_pred_i, y_true_i : (y_pred_i - y_true_i) / (y_pred_i * (1 - y_pred_i) + epsilon), y_pred, y_true))
#         d_cross_entropy = np.sum((y_pred - y_true) / (y_pred * (1 - y_pred) + epsilon))
        
#         # d_weights = sum(map(lambda xi, y_pred_i : ( xi * np.exp(-y_pred_i) ) / ( ( 1 + np.exp(-y_pred_i) ) ** 2 ), self.X_train, y_pred))
#         d_weights = np.sum(self.X_train * np.exp(-y_pred.reshape(-1, 1)) / ((1 + np.exp(-y_pred.reshape(-1, 1))) ** 2), axis=0)
        
#         # d_bias = sum(map(lambda y_pred_i : ( np.exp(-y_pred_i) ) / ( ( 1 + np.exp(-y_pred_i) ) ** 2 ), y_pred))
#         d_bias = np.sum(np.exp(-y_pred) / ((1 + np.exp(-y_pred)) ** 2))
        
#         # print(f"d_cross_entropy: {d_cross_entropy}\n")
        
#         dL_dw = d_cross_entropy * d_weights
#         dL_db = d_cross_entropy * d_bias
        
#         # Update weights and bias
#         self.__weights -= self.learning_rate * dL_dw
#         self.__bias -= self.learning_rate * dL_db
        

#     def train(self, epochs=100, learning_rate=0.01):
#         self.learning_rate = learning_rate

#         for epoch in range(epochs):
            
#             # Forward pass
#             y_pred = self.forward(self.X_train)

#             # Compute and print loss
#             loss = self.loss(y_pred, self.y_train)

#             # print(f"Weights: {self.__weights}\nBias: {self.__bias}\nLoss: {loss}\n")

#             # Backward pass
#             self.backward(y_pred, self.y_train)

#             if epoch % 100 == 0:
#                 # print(f"Y_Pred: {y_pred}")
#                 print(f'Epoch: {epoch}, Loss: {loss}\n')
                
#         # training accuracy
#         y_pred = self.forward(self.X_train)
        
#         y_pred = np.where(y_pred > 0.5, 1, 0)
#         print(f"Training Accuracy: {np.mean(y_pred == self.y_train)}")

In [151]:
perceptron = Perceptron()
perceptron.fit(X_train, y_train)
perceptron.train(epochs = 100, learning_rate = 0.01)

Epoch: 0, Loss: 212.18298920628365

Epoch: 10, Loss: 212.18298920628365

Epoch: 20, Loss: 212.18298920628365

Epoch: 30, Loss: 212.18298920628365

Epoch: 40, Loss: 212.18298920628365

Epoch: 50, Loss: 212.18298920628365

Epoch: 60, Loss: 212.18298920628365

Epoch: 70, Loss: 212.18298920628365

Epoch: 80, Loss: 212.18298920628365

Epoch: 90, Loss: 212.18298920628365

