In [63]:
import numpy as np
import pandas as pd

In [64]:
df = pd.read_csv("diabetes.csv")

df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [65]:
df["Outcome"] = df["Outcome"].replace(0, -1)

In [66]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,-1
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,-1
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,-1
764,2,122,70,27,0,36.8,0.340,27,-1
765,5,121,72,23,112,26.2,0.245,30,-1
766,1,126,60,0,0,30.1,0.349,47,1


In [67]:
class Perceptron(object):
    
    def __init__(self, lrate = 0.01, iters = 10):
        self.lrate = lrate
        self.iters = iters
        
    def weighted_sum(self, x):
        return np.dot(x, self.w[1:]) + self.w[0]
    
    def predict(self, x):
        return np.where(self.weighted_sum(x) >= 0.0, 1, -1)
    
    def fit(self, X, y):
        self.w = np.zeros(1 + X.shape[1])
        self.errors_ = []

        for _ in range(self.iters):
            error = 0
            for xi, target in zip(X, y):
                y_pred = self.predict(xi)
                update = self.lrate * (target - y_pred)
                self.w[1:] = self.w[1:] + update * xi
                self.w[0] = self.w[0] + update
                error += int(update != 0.0)

            self.errors_.append(error)

        return self

In [68]:
x = df.iloc[:, 0:8].values
y = df.iloc[:, 8].values

print(x[0:10])
print(y[0:10])

[[6.000e+00 1.480e+02 7.200e+01 3.500e+01 0.000e+00 3.360e+01 6.270e-01
  5.000e+01]
 [1.000e+00 8.500e+01 6.600e+01 2.900e+01 0.000e+00 2.660e+01 3.510e-01
  3.100e+01]
 [8.000e+00 1.830e+02 6.400e+01 0.000e+00 0.000e+00 2.330e+01 6.720e-01
  3.200e+01]
 [1.000e+00 8.900e+01 6.600e+01 2.300e+01 9.400e+01 2.810e+01 1.670e-01
  2.100e+01]
 [0.000e+00 1.370e+02 4.000e+01 3.500e+01 1.680e+02 4.310e+01 2.288e+00
  3.300e+01]
 [5.000e+00 1.160e+02 7.400e+01 0.000e+00 0.000e+00 2.560e+01 2.010e-01
  3.000e+01]
 [3.000e+00 7.800e+01 5.000e+01 3.200e+01 8.800e+01 3.100e+01 2.480e-01
  2.600e+01]
 [1.000e+01 1.150e+02 0.000e+00 0.000e+00 0.000e+00 3.530e+01 1.340e-01
  2.900e+01]
 [2.000e+00 1.970e+02 7.000e+01 4.500e+01 5.430e+02 3.050e+01 1.580e-01
  5.300e+01]
 [8.000e+00 1.250e+02 9.600e+01 0.000e+00 0.000e+00 0.000e+00 2.320e-01
  5.400e+01]]
[ 1 -1  1 -1  1 -1  1 -1  1  1]


In [69]:
from sklearn.model_selection._split import train_test_split

train_data, test_data, train_labels, test_labels = train_test_split(x, y, test_size=0.25)

train_labels = np.where(train_labels == 1, 1, -1)
test_labels = np.where(test_labels == 1, 1, -1)

print('Train data:', train_data[0:2])
print('Train labels:', train_labels[0:5])

print('Test data:', test_data[0:2])
print('Test labels:', test_labels[0:5])

Train data: [[  3.     78.     70.      0.      0.     32.5     0.27   39.   ]
 [  3.    150.     76.      0.      0.     21.      0.207  37.   ]]
Train labels: [-1 -1 -1 -1  1]
Test data: [[9.00e+00 1.64e+02 8.40e+01 2.10e+01 0.00e+00 3.08e+01 8.31e-01 3.20e+01]
 [2.00e+00 1.57e+02 7.40e+01 3.50e+01 4.40e+02 3.94e+01 1.34e-01 3.00e+01]]
Test labels: [ 1 -1 -1 -1  1]


In [79]:
perceptron = Perceptron(lrate=0.05, iters=25)

perceptron.fit(train_data, train_labels)

<__main__.Perceptron at 0x19929fcec50>

In [80]:
test_preds = perceptron.predict(test_data)
print(test_preds)

[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1  1 -1 -1  1
 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1]


In [81]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(test_preds, test_labels)
print('Accuracy:', round(accuracy, 2) * 100, "%")

Accuracy: 64.0 %
