## Perceptron Algorithm

### 1. Loading the Dataset

In [None]:
import requests
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from io import BytesIO

# URL of the dataset
diabets_dataset = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/diabetes'

# Downloading the dataset
dataset = requests.get(diabets_dataset)

# Loading the dataset from the downloaded content
X, y = load_svmlight_file(BytesIO(dataset.content))

# X contains the feature vectors, and y contains the labels

### 2. Spliting the dataset into training and testing sets

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 3. Initializing of weights and bias to zeros

In [16]:
n_features = X_train.shape[1]
weights = np.zeros(n_features)
bias = 0

In [17]:
# Defining the learning rate
learning_rate = 0.01

In [43]:
# Defining the number of training iterations
max_iterations = 10000

### 4. The Activation function

In [19]:
def activation(z):
    return np.where(z >= 0, 1, -1)

### 5. Training with Perceptron Algorithm

In [21]:
for _ in range(max_iterations):
    errors = 0
    for xi, target in zip(X_train, y_train):
        # Converting xi to a dense (NumPy) array
        xi_dense = xi.toarray().reshape(-1)  # Reshape to 1D array if needed
        
        # Computing the weighted sum
        z = np.dot(xi_dense, weights) + bias
        # Applying the activation function
        prediction = activation(z)
        # Updating weights and bias if there is a chance of misclassification
        if prediction != target:
            update = learning_rate * (target - prediction)
            weights += update * xi_dense
            bias += update
            errors += 1
    # Code to stop early if there are no errors
    if errors == 0:
        break

### 6. Making predictions on the test data

In [22]:
def predict(X):
    z = np.dot(X.toarray(), weights) + bias
    return activation(z)

y_pred = predict(X_test)

### 7. Evaluation of the model

In [45]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.7207792207792207
Precision: 0.7121212121212122
Recall: 0.9494949494949495
F1 Score: 0.8138528138528138


### 8. Visualisation of the Result - Using confusion matrix

In [52]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Calculate the confusion matrix
cm = confusion_matrix(y_test, X_train)

# Create a heatmap for the confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", annot_kws={"size": 16})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

ValueError: Found input variables with inconsistent numbers of samples: [154, 614]