# Advanced Machine Learning
## Implementing Perceptrons

Import required packages.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math

from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted, check_random_state
from sklearn.utils.multiclass import unique_labels
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

## Task 1: The Perceptron Classifier

Define the PerceptronClassifier class

In [2]:
class Perceptron:
    
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    def __init__(self, learning_rate=0.001,epoch=10):
        self.learning_rate = learning_rate
        self._b = 0.0  
        self._w = None  # weights assigned to input features
        self.misclassified_samples = []# count of errors during each iteration
        self.epoch = epoch

    
    def fit(self, x: np.array, y: np.array):
        #self._w = np.zeros(x.shape[1])
        self._w = np.random.randn(x.shape[1])*0.01
        self._b = 0.0

        for _ in range(self.epoch):
            errors = 0
            for xi, yi in zip(x, y):
                # for each sample compute the update value
                update = self.learning_rate * (yi - self.predict(xi))
                # and apply it to the y-intercept and weights array
                self._b += update
                self._w += update * xi
                errors += int(update != 0.0)
            self.misclassified_samples.append(errors)
        return self
    
    #Let the input pass through the activation function
    def f(self, x: np.array) -> float:
        a = np.dot(x, self._w) + self._b
        return self.sigmoid(a)

    #Predict the label
    def predict(self, x: np.array): 
        return np.where(self.f(x) > 0.5, 1, 0)

    def predict_prob(self, x: np.array):
        self.x = x 
        res_list = []
        for sample in x:
            res_list.append(self.f(sample))  
        return np.array(res_list)

## Task 2: Evaluation

Load the Diabethic Retinopathy dataset

In [3]:
dataset = pd.read_csv('messidor_features.csv', index_col = 0)
print(dataset.shape)

(1151, 19)


Evaluate the perfomrance of the perceptron classifier on the daibetic retinopathy dataset.

In [4]:
dataset = dataset.reset_index(drop=False)
X = dataset.iloc[:,:18].values
Y = dataset["Class"].values
X = X.astype(float)
Y = np.array([1 if i == "b'1'" else 0 for i in Y])

X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                    test_size=0.5,
                                                    random_state=42)
X_train.shape, X_test.shape

((575, 18), (576, 18))

Normalize data.

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_test,X_test.shape

(array([[ 0.05907962, -3.01458443, -0.98319715, ..., -0.21488684,
          0.95897316,  1.34298837],
        [ 0.05907962,  0.33172068,  0.71799885, ..., -0.21488684,
          1.11741976,  0.10788345],
        [ 0.05907962,  0.33172068,  0.83398949, ..., -0.21488684,
          0.02649378,  1.28827565],
        ...,
        [ 0.05907962,  0.33172068,  0.02205503, ..., -0.21488684,
          0.21399555,  0.63547042],
        [ 0.05907962,  0.33172068, -0.55789815, ..., -0.21488684,
         -1.29817621, -0.68818131],
        [ 0.05907962,  0.33172068, -1.29250551, ..., -0.21488684,
         -0.75636773,  0.03175675]]),
 (576, 18))

In [6]:
p = Perceptron()
p.fit(X_train, y_train)
results= p.predict(X_test)
accuracy = accuracy_score(p.predict(X_test),y_test)
print("My accuracy is %.2f" % accuracy)

My accuracy is 0.65


In [7]:
print(results)

[0 1 1 0 1 1 0 1 0 0 1 1 0 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 0 0 0 0 0 0 1 1 1 0 0 1 1 0 1 0 0 1 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0
 1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 0 1 0 0 0 1 1 1 1 0 0 0 1 1 1 0 1 0 0 1 1 1
 1 0 0 1 1 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 1 0 1 1 0 0 0 0
 0 1 1 0 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1 0 1 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1 1
 1 1 1 1 0 0 1 0 0 1 0 1 0 0 0 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 0 1 0 1 0 0 0
 0 1 1 0 1 0 1 1 1 1 0 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1
 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 1 0 1 0 0 1 1 0 0 0 1 0 1 1 1 0 1 1 0 1 0 1
 1 0 1 0 0 1 0 0 0 1 1 1 0 0 0 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 1
 1 1 1 0 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 0 1 1 0 1 1 0 0 0 0 0 1 0 1 1 0 1 0
 1 1 1 0 0 0 0 1 1 0 1 0 1 1 0 1 1 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 1
 1 1 1 1 1 0 1 0 1 0 1 1 1 0 0 1 1 0 1 0 1 1 0 0 1 0 0 1 1 1 0 1 1 1 0 1 0
 0 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 1 0 1 1 1 0 0 0 1 1 0 1 0
 1 1 1 0 0 1 1 1 0 0 1 1 

## Task 3 & 4: Add Different Activations & Regularisation

Reimplement the PerceptronClassifier class adding an activation function option and L2 regularisation. 

In [8]:
class PerceptronClassifier2(BaseEstimator, ClassifierMixin):
    
    def regular_method(self): 
        if self.regularization == 'L2':
            self.reg_strength=1000
        elif self.regularization == 'Normal':
            self.reg_strength=0
        return self.reg_strength
        
    def __init__(self,activation_function = 'tanh',regularization='Normal',learning_rate = 0.00001, epochs = 10):
        
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.reg_strength = 10000
        self._b = 0.0
        self.activation_function = activation_function
        self.regularization = regularization
    

    #Calculate the cost
    def cost_computation(self, W, X, Y):
        
        # Hinge loss calculation
        n = X.shape[0]
        distances = 1 - Y * (np.dot(X, W))
        distances[distances < 0] = 0 # make all distances less than 0 equal to 0.
        hinge_loss = self.regular_method()*(np.nansum(distances)/n)

        # Calculate cost
        cost = 1/2 * np.dot(W,W) + hinge_loss
        return cost
    
    #Update the gradient
    def cost_gradient_computation(self, W, X_ij, Y_ij):
        
        if type(Y_ij) == np.float64:
            Y_ij = np.array([Y_ij])
            X_ij = np.array([X_ij])  # gives multidimensional array
            
        distance = 1 - (Y_ij * np.dot(X_ij, W))
        dw = np.zeros(len(W))
        
        if max(0,distance) == 0:
            di = W
        else:
            di = W - (self.reg_strength * Y_ij* X_ij)
        dw += di
        return dw
    
    #train the model
    def fit(self, X, Y):
        
        # Stochastic gradient descent
        max_epochs = 100
        #self.weights = np.zeros(X.shape[1])
        self.weights = np.random.randn(X.shape[1])*0.01
        nth = 0
        #prev_cost = 9999999
        cost_threshold = 0.001  # in percent
        
        # stochastic gradient descent
        for epoch in range(1,max_epochs):
            
            # shuffle
            X_i, Y_i = X, Y
            
            for i, x in enumerate(X_i):

                ascent = self.cost_gradient_computation(self.weights, x, Y_i[i])
                self.weights = self.weights - (self.learning_rate * ascent)
            
            if epoch == 2 ** nth:
                cost = self.cost_computation(self.weights, X, Y)
                print("Epoch is: {} and Cost is: {} \n {}".format(epoch, cost, self.weights))
                nth += 1
                
        return self.weights
    
    #Outputs the predictions for all samples
    def predict_prob(self, X, Y):
        
        Y_te_predictions = np.array([])
        
        for i in range(X.shape[0]):
            X_ = np.dot(X, self.weights) + self._b   
            Yp = np.where(self.forward_activation(X_) > 0.5, 1, 0)
            Y_te_predictions = np.append(Y_te_predictions, Yp)
            
        return Y_te_predictions

    def forward_activation(self, X): 
        if self.activation_function == "sigmoid":
            return 1.0/(1.0 + np.exp(-X))
        elif self.activation_function == "tanh":
            return np.tanh(X)

    # predict the label of the sample
    def predict(self, X):
        a = np.dot(X, self.weights) + self._b        
        return np.where(self.forward_activation(a) > 0.5, 1, 0)  
    

Load the dataset and explore it.

In [9]:
dataset = pd.read_csv('messidor_features.csv', index_col = 0)
print(dataset.shape)

(1151, 19)


In [10]:
dataset = dataset.reset_index(drop=False)
X = dataset.iloc[:,:18].values
Y = dataset["Class"].values
X = X.astype(float)
Y = np.array([1 if i == "b'1'" else 0 for i in Y])

X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                    test_size=0.5,
                                                    random_state=42)
X_train.shape, X_test.shape

((575, 18), (576, 18))

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
p = PerceptronClassifier2()
p.fit(X_train, y_train)

Epoch is: 1 and Cost is: 2.30147525281711 
 [ 1.29038907 -0.7369357   0.75444061  0.43235859  0.05599408 -0.4072961
 -0.67213458 -0.49929725  0.32546373 -0.18641708 -0.51165204 -0.35430248
  0.34766497  0.19133626  0.22429538  0.00664155 -0.08909395 -0.15748932]
Epoch is: 2 and Cost is: 2.275159759935572 
 [ 1.28299058 -0.73271046  0.750115    0.42987965  0.05567304 -0.40496086
 -0.66828088 -0.49643451  0.32359767 -0.18534825 -0.50871847 -0.35227107
  0.34567162  0.19023923  0.22300937  0.00660347 -0.08858312 -0.15658635]
Epoch is: 4 and Cost is: 2.223428022279714 
 [ 1.26832063 -0.72433251  0.74153805  0.42496433  0.05503646 -0.40033046
 -0.66063963 -0.49075819  0.3198976  -0.18322894 -0.50290169 -0.34824314
  0.34171915  0.188064    0.22045944  0.00652797 -0.08757025 -0.15479591]
Epoch is: 8 and Cost is: 2.1234665727872124 
 [ 1.23948203 -0.70786291  0.72467723  0.41530165  0.05378506 -0.39122789
 -0.64561825 -0.47959951  0.31262388 -0.17906275 -0.4914669  -0.34032492
  0.33394926  0

array([ 0.73450458, -0.41947244,  0.42943644,  0.24610358,  0.03187249,
       -0.23183772, -0.38258688, -0.28420585,  0.18525777, -0.10611078,
       -0.29123834, -0.20167312,  0.19789498,  0.10891084,  0.12767156,
        0.00378045, -0.05071332, -0.08964476])

In [13]:
results= p.predict_prob(X_test,y_test)
print(results)

results_1= p.predict(X_test)
accuracy = accuracy_score(p.predict(X_test),y_test)
print("My accuracy is %.2f" % accuracy)

[1. 0. 0. ... 0. 0. 0.]
My accuracy is 0.54


## Task 5: Reflect on the Performance of the Different Models Evaluated

Perform hyper-parameter tuning and evaluate models. 

In [14]:
per_param_grid = {
              'activation_function':['sigmoid', 'tanh'],
    'regularization':['L2','Normal']
             }

In [15]:
p_pipe = GridSearchCV(PerceptronClassifier2(),per_param_grid,
                      verbose = 1, n_jobs = -1)
p_pipe = p_pipe.fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Epoch is: 1 and Cost is: 951.41884582053 
 [ 1.29733482 -0.40729615  0.87735285  0.51920851  0.10138385 -0.42421537
 -0.71754973 -0.64146856  0.45355845 -0.04832531 -0.22316186 -0.32810782
  0.27143999  0.06273603  0.12998151 -0.31030527 -0.13149339 -0.05875337]
Epoch is: 2 and Cost is: 872.1082621685349 
 [ 1.43185215 -0.27555028  0.97828034  0.61039524  0.2083728  -0.30529879
 -0.56023207 -0.42341304  0.41696417 -0.02833649 -0.24238353 -0.3994931
  0.22129963  0.04281268  0.19340874 -0.15863085 -0.05454392 -0.21483281]
Epoch is: 4 and Cost is: 860.7279561741279 
 [ 1.69916888 -0.24383172  0.95193162  0.57389968  0.1858695  -0.32551537
 -0.52638958 -0.28706513  0.44033723 -0.0455844  -0.29762877 -0.50749705
  0.20413385  0.0315277   0.25896843 -0.0794428  -0.04600357 -0.26500061]
Epoch is: 8 and Cost is: 846.9639118860716 
 [ 2.2199893  -0.2406379   0.91803473  0.50365847  0.12015738 -0.39476985
 -0.50933692 -0.13962645  0.47

In [16]:
p_pipe.best_params_

{'activation_function': 'tanh', 'regularization': 'L2'}

In [17]:
p_best = PerceptronClassifier2(**p_pipe.best_params_)
p_best.fit(X_train, y_train)
best_accuracy = accuracy_score(p_best.predict(X_test),y_test)

Epoch is: 1 and Cost is: 1044.1048211787615 
 [ 1.27394475 -0.56672162  0.78679301  0.43873598  0.06737033 -0.40577358
 -0.82881976 -0.81609332  0.32213678 -0.08618461 -0.23813283 -0.2307765
  0.43033992  0.27373938  0.34756495  0.01120852 -0.07143653 -0.6625867 ]
Epoch is: 2 and Cost is: 880.2956916738569 
 [ 1.40035941 -0.28022281  0.97344645  0.62189682  0.25711441 -0.20370279
 -0.60712693 -0.53366715  0.3862379  -0.05496908 -0.24876016 -0.3744859
  0.28056184  0.13261516  0.224545   -0.09207094 -0.10289568 -0.4265029 ]
Epoch is: 4 and Cost is: 861.9583428812629 
 [ 1.66159342 -0.21814493  0.94215688  0.58211692  0.2336669  -0.2160092
 -0.54774688 -0.3488879   0.44636766 -0.05742132 -0.30250456 -0.48271523
  0.25760095  0.11501235  0.1912129  -0.09308863 -0.06286418 -0.31357759]
Epoch is: 8 and Cost is: 848.3999206034437 
 [ 2.18329011 -0.21527497  0.8934832   0.49728593  0.15887149 -0.29134518
 -0.53552699 -0.17799408  0.48985352 -0.00708191 -0.32051045 -0.66690611
  0.21140737  0.

In [18]:
print("My accuracy obtained by using best parameters is %.2f" % best_accuracy)

My accuracy obtained by using best parameters is 0.67
