# Import the libraries

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random

## Load the dataset

In [2]:
# Create an empty list to store the data
data = []

# Read each line from the dataset and split it into label and features
with open('./pima-diabetes.txt', 'r') as file:
    for line in file:
        parts = line.strip().split()
        label = int(parts[0])
        features = {}
        for part in parts[1:]:
            feature_id, feature_value = part.split(':')
            features[int(feature_id)] = float(feature_value)
        data.append({'Label': label, **features})

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data)

In [3]:
df.head() # View top 5 rows of the dataset

Unnamed: 0,Label,1,2,3,4,5,6,7,8
0,-1,-0.294118,0.487437,0.180328,-0.292929,-1.0,0.00149,-0.53117,-0.033333
1,1,-0.882353,-0.145729,0.081967,-0.414141,-1.0,-0.207153,-0.766866,-0.666667
2,-1,-0.058824,0.839196,0.04918,-1.0,-1.0,-0.305514,-0.492741,-0.633333
3,1,-0.882353,-0.105528,0.081967,-0.535354,-0.777778,-0.162444,-0.923997,-1.0
4,-1,-1.0,0.376884,-0.344262,-0.292929,-0.602837,0.28465,0.887276,-0.6


In [4]:
df.info() # Get info about the dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Label   768 non-null    int64  
 1   1       768 non-null    float64
 2   2       768 non-null    float64
 3   3       767 non-null    float64
 4   4       768 non-null    float64
 5   5       768 non-null    float64
 6   6       768 non-null    float64
 7   7       768 non-null    float64
 8   8       760 non-null    float64
dtypes: float64(8), int64(1)
memory usage: 54.1 KB


In [5]:
df.shape # Check the shape of the dataset

(768, 9)

### Check for missing values

In [6]:
df.isna().sum()

Label    0
1        0
2        0
3        1
4        0
5        0
6        0
7        0
8        8
dtype: int64

### Drop the missing values

In [7]:
df.dropna(inplace=True)

## Split the dataset

In [8]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df.drop('Label', axis=1), df['Label'], test_size=0.2)

In [9]:
# Convert to Numpy Array for better handling of the data
x_train = np.array(x_train)
x_test = np.array(x_test)

y_train = np.array(y_train)
y_test = np.array(y_test)

In [10]:
x_train.shape # Check the shape of data

(607, 8)

## Custom Perceptron

In [11]:
def perceptron_custom(inputs, weights, bias):
    """
    Compute the output of a perceptron.

    :param inputs: List of input values (features)
    :param weights: List of weight values, one for each input feature
    :param bias: Bias term (a single value)
    :return: Output of the perceptron (0 or 1)
    """
    # Ensure that the number of inputs and weights match
    if inputs.shape[0] != len(weights):
        raise ValueError("Number of inputs must be equal to the number of weights")

    # Calculate the weighted sum of inputs
    weighted_sum = sum([inputs[i] * weights[i] for i in range(len(inputs))]) + bias

    # Apply the step function (typically a threshold at 0)
    if weighted_sum >= 0:
        return 1  # Output is 1 if the weighted sum is greater than or equal to 0
    else:
        return -1  # Output is 0 if the weighted sum is less than 0


In [12]:
def train_perceptron(X, y, epochs):
    """
    Train a perceptron model.

    :param X: Training data, a list of feature vectors.
    :param y: Training labels, a list of target values (-1 or 1).
    :param max_epochs: Maximum number of training epochs.
    :return: Trained weights and bias.
    """
    # Initialize weights and bias
    num_features = 8
    weights = [random.uniform(-0.5, 0.5) for _ in range(num_features)]
    bias = random.uniform(-0.5, 0.5)

    # Define the learning rate
    learning_rate = 0.0001

    # Training loop
    epochs = 1000
    for epoch in range(epochs):
        total_error = 0
        for x_1, y_1 in zip(X,y):  # Iterate through your training dataset
            # print(x_1, y_1)
            # Calculate predicted output
            predicted_output = perceptron_custom(x_1, weights, bias)

            # Calculate error
            error = y_1 - predicted_output
            total_error += abs(error)
            # print(y_1, predicted_output)
            # Update weights and bias
            for i in range(num_features):
                weights[i] += learning_rate * error * x_1[i]
            bias += learning_rate * error

        # Check for convergence or print progress
        if total_error < 0.1:
            print(f"Converged after {epoch} epochs.")
            break
        elif epoch % 1 == 0:
            print(f"Epoch {epoch}: Total Error = {total_error}")
            
    return weights, bias


def predict_perceptron(X, weights, bias):
    y1_pred = np.dot(X, weights) + bias
    y1_pred = np.where(y1_pred > 0, 1, -1)
    
    return y1_pred


In [13]:
w1, b1 = train_perceptron(x_train, y_train, 100)

Epoch 0: Total Error = 426
Epoch 1: Total Error = 420
Epoch 2: Total Error = 408
Epoch 3: Total Error = 426
Epoch 4: Total Error = 442
Epoch 5: Total Error = 476
Epoch 6: Total Error = 498
Epoch 7: Total Error = 504
Epoch 8: Total Error = 516
Epoch 9: Total Error = 520
Epoch 10: Total Error = 528
Epoch 11: Total Error = 524
Epoch 12: Total Error = 524
Epoch 13: Total Error = 516
Epoch 14: Total Error = 510
Epoch 15: Total Error = 510
Epoch 16: Total Error = 502
Epoch 17: Total Error = 490
Epoch 18: Total Error = 484
Epoch 19: Total Error = 480
Epoch 20: Total Error = 468
Epoch 21: Total Error = 460
Epoch 22: Total Error = 452
Epoch 23: Total Error = 448
Epoch 24: Total Error = 446
Epoch 25: Total Error = 438
Epoch 26: Total Error = 438
Epoch 27: Total Error = 430
Epoch 28: Total Error = 426
Epoch 29: Total Error = 424
Epoch 30: Total Error = 418
Epoch 31: Total Error = 414
Epoch 32: Total Error = 406
Epoch 33: Total Error = 396
Epoch 34: Total Error = 392
Epoch 35: Total Error = 388
Ep

## Perceptron from Sk-learn Library

In [14]:
# Create a Perceptron classifier
perceptron = Perceptron(random_state=42)

# Train the Perceptron on the training data
perceptron.fit(x_train, y_train)

# Make predictions on the test data
y_pred = perceptron.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7039473684210527


## Comparison

In [15]:
y1_pred = np.dot(x_test, w1) + b1
y1_pred = np.where(y1_pred > 0, 1, -1)

In [16]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y1_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8157894736842105
