In [4]:
import pandas as pd
import numpy  as np
import random
from sklearn.preprocessing import MinMaxScaler, Normalizer

In [5]:
diabetes = pd.read_csv('https://raw.githubusercontent.com/ryanleeallred/datasets/master/diabetes.csv')
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
df_inputs = diabetes.copy().drop("Outcome", axis=1)

In [7]:
df_inputs.sample(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
156,2,99,52,15,94,24.6,0.637,21
597,1,89,24,19,25,27.8,0.559,21
88,15,136,70,32,110,37.1,0.153,43
648,11,136,84,35,130,28.3,0.26,42
733,2,106,56,27,165,29.0,0.426,22
474,4,114,64,0,0,28.9,0.126,24
490,2,83,65,28,66,36.8,0.629,24
697,0,99,0,0,0,25.0,0.253,22
105,1,126,56,29,152,28.7,0.801,21
493,4,125,70,18,122,28.9,1.144,45


In [8]:
df_outputs = diabetes.copy()["Outcome"]
df_outputs = pd.DataFrame({"Outcome": df_outputs})

In [9]:
type(df_outputs)

pandas.core.frame.DataFrame

In [10]:
df_outputs.describe()

Unnamed: 0,Outcome
count,768.0
mean,0.348958
std,0.476951
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [11]:
df_inputs.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0


In [12]:
# Scale the inputs dataframe column values
scaler = MinMaxScaler()

df_inputs_scaled = pd.DataFrame(scaler.fit_transform(df_inputs), columns=df_inputs.columns)

In [13]:
df_inputs_scaled.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,0.22618,0.60751,0.566438,0.207439,0.094326,0.47679,0.168179,0.204015
std,0.19821,0.160666,0.158654,0.161134,0.136222,0.117499,0.141473,0.196004
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.058824,0.497487,0.508197,0.0,0.0,0.406855,0.070773,0.05
50%,0.176471,0.58794,0.590164,0.232323,0.036052,0.4769,0.125747,0.133333
75%,0.352941,0.704774,0.655738,0.323232,0.150414,0.545455,0.234095,0.333333
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [22]:
# Add a bias column
df_inputs_scaled["bias"] = 1.5
df_inputs_scaled.describe()
df_inputs_scaled.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'bias'],
      dtype='object')

In [23]:
# Generate a numpy array containing the model inputs and a bias column
inputs = df_inputs_scaled.to_numpy()
inputs_features = df_inputs_scaled.columns
print(f'Does the input array and the df_inputs_scaled dataframe have the same shape: {inputs.shape == df_inputs_scaled.shape}')
print("df_inputs_scaled.shape: ", df_inputs_scaled.shape)
print("inputs.shape: ", inputs.shape)

Does the input array and the df_inputs_scaled dataframe have the same shape: True
df_inputs_scaled.shape:  (768, 9)
inputs.shape:  (768, 9)


In [24]:
# Generate a numpy array containing the model outputs
outputs = df_outputs.to_numpy()
type(outputs)

numpy.ndarray

In [25]:
outputs.shape

(768, 1)

In [26]:
class Perceptron:
    
    def __init__(self, X: np.array, y: np.array, num_wghts: int, niter = 10):
        self.niter     = niter
        self.X         = X
        self.y         = y
        self.num_wghts = num_wghts
        
    def config_report(self):
        print(f'---- Config/Data Report ----')
        print(f'Shape of inputs (self.X): {self.X.shape}')
        print(f'Shape of outputs (self.y): {self.y.shape}')
        print(f'Number of weights: {self.num_wghts}')
        print(f'Number of iterations: {self.niter}')
    
    def __sigmoid(self, x):
        sgmd = 1 / (1 + np.exp(-x))
        return sgmd
    
    def __sigmoid_derivative(self, x):
        sgmd = self.__sigmoid(x)
        return (sgmd * (1 - sgmd))
    
    def __gen_weights(self) -> np.array:
        ret_weights = pd.DataFrame(
            {"weights": [random.uniform(-0.2, 0.333) for i in range(self.num_wghts)]}
        ).to_numpy()
        
        return ret_weights
        

    def fit_model_predict(self, X, y):
        """Fit training data
        X : Training vectors, X.shape : [#samples, #features]
        y : Target values, y.shape : [#samples]
        """
        # Randomly Initialize Weights
        weights = self.__gen_weights()

        for i in range(self.niter):
            #--- Weighted sum of inputs / weights
            weighted_sum = np.dot(self.X, weights)

            #--- Activate!
            activated_output = self.__sigmoid(weighted_sum)

            #--- Cac error
            # Determine error between the target outcome and the nodes activated output
            error = self.y - activated_output
            
            # Determine weight adjustments
            adjustments = error * self.__sigmoid_derivative(weighted_sum)

            # Update the Weights
            weights += np.dot(self.X.T, adjustments)
            
        return activated_output

In [27]:
# Create a new Perceptron class
my_percept = Perceptron(inputs, outputs, num_wghts=9, niter=1000)

In [28]:
my_percept.config_report()

---- Config/Data Report ----
Shape of inputs (self.X): (768, 9)
Shape of outputs (self.y): (768, 1)
Number of weights: 9
Number of iterations: 1000


In [29]:
# Train the model and output a prediction
my_percept_pred = my_percept.fit_model_predict(inputs, outputs)

In [30]:
my_percept_pred.shape

(768, 1)

In [31]:
my_percept_pred

array([[7.47775751e-25],
       [1.05446429e-23],
       [3.50337427e-24],
       [9.41413262e-24],
       [2.05973430e-24],
       [7.33523688e-24],
       [2.62823606e-23],
       [1.15184357e-21],
       [2.85623954e-25],
       [1.27233760e-23],
       [7.04158257e-25],
       [6.63550682e-25],
       [1.85593498e-24],
       [1.30437156e-24],
       [1.51544980e-24],
       [2.32464315e-21],
       [1.07545344e-25],
       [6.27703020e-24],
       [2.26274717e-23],
       [1.66028975e-24],
       [1.23192732e-25],
       [2.38060096e-24],
       [9.28802679e-26],
       [1.08189711e-24],
       [1.33795325e-25],
       [2.52509594e-24],
       [1.01290246e-24],
       [1.32424287e-23],
       [1.93147852e-24],
       [8.35668793e-25],
       [1.77585907e-24],
       [3.28329720e-25],
       [3.40200337e-23],
       [4.99492081e-24],
       [1.61228549e-24],
       [9.18987363e-24],
       [1.84425951e-24],
       [1.49746708e-24],
       [1.76230360e-24],
       [6.55668846e-25],
