In [None]:
#importo le librerie necessarie
import numpy as np
import pandas as pd

#definisco l'algoritmo come una classe
class Perceptron(object):
    
    #inizializzatore
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state
    
    #funzione per il fitting dei dati di training
    def fit(self, X, y):
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal (loc=0.0, scale=0.01, size=1+X.shape[1])
        
        self.errors_ = []
        
        for _ in range(self.n_iter):
            errors = 0
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
            self.errors_.append(errors)
        return self
    
    #funzione per calcolare il net input
    def net_input(self, X):
        return np.dot(X, self.w_[1:]) + self.w_[0]
    
    #funzione per predire i valori delle y
    def predict(self, X):
        return np.where(self.net_input(X) >= 0.0, 1, -1)
        

In [None]:
#import del dataset
dataset = pd.read_excel(r"C:\Users\fspadafora\OneDrive - BUSINESS INTEGRATION PARTNERS SPA\Desktop\KAGGLE\spaceship-titanic\train.xlsx")

In [None]:
#trasformo le due variabili binarie in true/false 
dataset['CryoSleep'] = np.where(dataset['CryoSleep'] > 0, 'True', 'False')
dataset['VIP'] = np.where(dataset['VIP'] > 0, 'True', 'False')

#trasformo la variabile target da true/false a 1/-1
dataset['Transported'] = np.where(dataset['Transported'] == True, 1, -1)

#rimuovo le variabili non numeriche dal dataset
df = dataset.drop(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP', 'Name'], axis=1)

#rimuovo missing values
df = df.dropna()

In [None]:
#standardizzo le variabili
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df[['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']] = scaler.fit_transform(df[['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']])

In [None]:
y_ = df.iloc[0:, 6].values
y = y_.reshape(len(y_), 1)
#X = df.iloc[0:, [0,1]].values
X = df.iloc[0:, 0:5].values

In [None]:
#preparo gli array per il grafico nel caso di sole 2 features
lX1 = []
lX2 = []
counter = 0
for xi, target in zip(X, y):
    if target == 1:
        lX1.append(list(X[counter, (0,1)]))
    else:
        lX2.append(list(X[counter, (0,1)]))
    counter += 1
    
X1 = np.asarray(lX1)
X2 = np.asarray(lX2)

In [None]:
#faccio il grafico delle X nel caso di 2 sole features
import matplotlib.pyplot as plt
plt.scatter(X1[0:100,0], X1[0:100,1], color='red', marker='o', label='transposed = 1')
plt.scatter(X2[0:100,0], X2[0:100,1], color='blue', marker='x',label='transposed = -1')

In [None]:
#inizializzo un'istanza della classe Perceptron (ovvero dell'algoritmo)
p = Perceptron(0.01, 100, 1)

In [None]:
#lancio la funzione per il training del modello
p.fit(X, y)

In [None]:
#vedo i valori dei parametri
p.w_

In [None]:
#valuto lo score
score = 1 - (p.errors_[(len(p.errors_) - 1)] / df.shape[0])
score

In [None]:
#plot the misclassification error for each epoch to check whether the algorithm converged and found a decision boundary
import matplotlib.pyplot as plt
plt.plot(range(1, len(p.errors_) + 1),
p.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of updates')
plt.show()

In [None]:
#implementazione del Perceptron di sklearn come termine di paragone
from sklearn.linear_model import Perceptron
clf = Perceptron(tol=1e-3, random_state=0, eta0 = 0.01)
clf.fit(X, y)
Perceptron()
clf.score(X, y)

In [None]:
#ulteriore implementazione del Perceptron leggermente più sofisticata con forward e back propagation

In [None]:
#definisco le funzioni

def activation_function(prediction):
    """
    Receives the output of the perceptron's function as parameter, and applies the
    activation function on it.
    For the purpose of this project, the activation function maps the negative outputs
    to 0 and the positive ones to 1
    """
    if prediction >= 0:
        return 1
    return -1


def predict(x, weights, bias):
    """
    Predicts the class of a given data point (x), by applying the Perceptron's 
    function, and the activation function lastly.
    As both weights and x are vectors, the dot product is used.
    """
    prediction = np.dot(weights, x) + bias
    prediction = activation_function(prediction)
    
    return prediction


def forward_propagation(x, y, weights, bias): 
    """
    x: training data as a vector (nparray), where each value corresponds
        to a feature's value
    y: label (-1 or 1)
    weights: weights of the perceptron
    bias: bias
    """
    y_pred = predict(x, weights, bias)
    loss = (y_pred - y)**2   
    d_loss = 2*(y_pred - y)
    
    return y_pred, loss, d_loss


def backpropagation(x, d_loss):
    """
    Performs the Backpropagation step on a given data point.
    receives as input the data point, the Perceptron's weights and the partial derivative of the loss
    over the predicted y.
    The received derivative is used to calculate the partial derivative of the loss over the weight of each feature.
    A list with the partial derivatives of the loss over each weight is returned.
    """
    partial_derivates = list()
    for feature_value in x:
        partial_derivates.append(d_loss*feature_value)
        
    return partial_derivates 


def optimize_perceptron(x, y, learning_rate):
    """
    Optimizes the Perceptron's weights by looping over the same steps for as many epochs as the user wants.
    Steps:
    1. Forward propagate data point
    2. Backpropagate
    3. Update weights
    4. Check stop conditions while looping
    
    It is worth nothing that a history of the Perceptron's losses over each epoch is kept,
    which will be used
    """
    weights = np.random.rand(x.shape[1])
    bias = np.random.rand()
    
    epoch = 0
    error = 999
    
    errors = list()
    epochs = list()
    
    # Loop until stop conditions are met
    while epoch <= 1000 and error > 9e-4:
        
        loss_ = 0
        # Loop over every data point
        for i in range(x.shape[0]):
            
            # Forward Propagation on each data point
            y_pred, loss, d_loss = forward_propagation(x[i], y[i], weights, bias)

            # Backpropagation
            partial_derivates = backpropagation(x[i], d_loss)
            
            # Learn by updating the weights of the perceptron
            weights = weights - (learning_rate * np.array(partial_derivates))

        # Evaluate the results
        for index, feature_value_test in enumerate(x):
            y_pred, loss, d_loss = forward_propagation(feature_value_test, y[index], weights, bias)
            loss_ += loss

        errors.append(loss_/len(x))
        epochs.append(epoch)
        error = errors[-1]
        epoch += 1

        print('Epoch {}. loss: {}'.format(epoch, errors[-1]))

    
    return weights, bias, errors

In [None]:
pp = optimize_perceptron(X, y, 0.01)
pp