In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import math
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from random import randrange
from random import seed
from sklearn.linear_model import Perceptron
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. 
diabetesData = pd.read_csv('/content/drive/MyDrive/MachineLearning/datasets/diabetes.csv')
diabetesData.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
#normalization of pandas dataframe
X=(diabetesData-diabetesData.min())/(diabetesData.max()-diabetesData.min())
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333,1.0
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667,0.0
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333,1.0
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0,0.0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2,1.0


#Perceptron#

In [5]:
#weights and inputs
def activation(W,X,bias):
  out = np.dot(W,X)
  return out + bias

def predictions(act_out):
  pred = 0
  if act_out >=0: pred = 1
  else: pred = 0
  return pred
#stochastic gradient descent
def SGD(X,lr,n_epochs):
  errors = []
  accs = []
  weights = np.zeros(X.shape[1]-1)
  bias = 0
  for epoch in range(n_epochs):
    sum_err = 0
    for data in X:
      x_i = data[:-1]
      y_i = data[-1]
      act_out = activation(weights,x_i,bias)
      y_pred = predictions(act_out)
      #weights updating
      error = y_i - y_pred
      bias = bias + lr*error
      weights = weights + lr*error*x_i
  return bias,weights
#k-fold cross validation
def cross_validation_split(X,folds):
  dataset_split = []
  data_copy = X
  fold_size = int(X.shape[0] / folds)
  for i in range(folds):
    fold = []
    while len(fold) < fold_size:
      index = randrange(len(data_copy))
      fold.append(data_copy[index])
      data_copy = np.delete(data_copy, index,axis=0)
    dataset_split.append(fold)
  return np.array(dataset_split)

def perceptron(X_train,X_test,lr,n_epochs):
  preds = []
  bias,weights = SGD(X_train,lr,n_epochs)
  print(f'Perceptron bias: {bias} , weights: {weights}')
  for x_test in X_test:
      x_i = x_test[:-1]
      y_i = x_test[-1]
      act_out = activation(weights,x_i,bias)
      y_pred = predictions(act_out)
      preds.append(y_pred)
  return np.array(preds)

def model_accuracy(actual_labels,predicted):
  correct = 0
  accuracy = 0
  for y_i,y_pred in zip(actual_labels,predicted):
    if y_i == y_pred: correct+=1
  accuracy = correct/float(len(actual_labels))
  return accuracy

In [6]:
# Evaluate the algorithm using a cross validation split
n_folds = 4
n_epochs = 500
lr = 0.01
folds = cross_validation_split(X.values, n_folds)
accuracy_scores = []
i = 0
seed(1)
for fold in folds:
  train_set = folds
  train_set = np.delete(train_set,i,axis=0)
  train_set =  np.concatenate(train_set)
  test_set = []
  for row in fold:
    r_copy = row
    test_set.append(r_copy)
  predicted = perceptron(train_set,test_set,lr,n_epochs)
  actual_labels = np.array([y[-1] for y in fold])
  accuracy = model_accuracy(actual_labels,predicted)
  accuracy_scores.append(accuracy)
  i+=1
print('\n')
print('Results obtained for n.folds: ', folds.shape[0])
print('Scores: %s' % accuracy_scores)
accs = sum(accuracy_scores)
print('Mean Accuracy: ', accs/float(len(accuracy_scores)))


Perceptron bias: -0.07 , weights: [ 0.01352941  0.04557789  0.00065574 -0.0010101  -0.00434988  0.04078987
  0.01681469  0.012     ]
Perceptron bias: -0.07 , weights: [ 0.02647059  0.04331658 -0.00344262  0.00676768 -0.00325059  0.05080477
  0.02263023  0.01183333]
Perceptron bias: -0.07 , weights: [ 0.02176471  0.05407035 -0.00983607  0.00373737 -0.00911348  0.05056632
  0.01769855  0.012     ]
Perceptron bias: -0.04 , weights: [ 0.00294118  0.05035176 -0.02139344  0.02676768  0.00152482  0.02868852
  0.01475235  0.005     ]


Results obtained for n.folds:  4
Scores: [0.7864583333333334, 0.7447916666666666, 0.7239583333333334, 0.7083333333333334]
Mean Accuracy:  0.7408854166666667


*Comparison with Sklearn model*

In [7]:
perceptron = Perceptron(alpha=0.01,max_iter=500)
X_= np.array([x_i[:-1] for x_i in X.values])
y_= np.array([y_i[-1] for y_i in X.values])
acc_scores = cross_val_score(perceptron, X_, y_, cv=4)
print('Accuracy scores for Sklearn model: %s' % acc_scores)
print('Mean Accuracy for Sklearn model: ', np.sum(acc_scores)/float(acc_scores.shape[0]))

Accuracy scores for Sklearn model: [0.72395833 0.71875    0.69791667 0.77604167]
Mean Accuracy for Sklearn model:  0.7291666666666666
