In [99]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Load the dataset
data = pd.read_csv('diabetes.csv')

# Replace zero values with NaN for specific columns
zero = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
data[zero] = data[zero].replace(0, np.nan)

# Fill NaN values with the mean of the column
data.fillna(data.mean(), inplace=True)

# Feature scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(data.drop('Outcome', axis=1))


# Encode labels
y = data['Outcome'].replace({0: -1, 1: 1}).values


In [100]:
# Split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [101]:
# Perceptron class definition
class Perceptron:
    def __init__(self, learning_rate=0.01, n_epochs=1000):
        self.lr = learning_rate
        self.n_epochs = n_epochs
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize weights and bias
        self.w = np.zeros(n_features)
        self.b = 0
        
        for epoch in range(self.n_epochs):
            errors = 0
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.w) + self.b
                y_predicted = np.sign(linear_output)
                
                # Update weights if there is a misclassification
                if y_predicted != y[idx]:
                    self.w += self.lr * y[idx] * x_i
                    self.b += self.lr * y[idx]
                    errors += 1

    def predict(self, X):
        linear_output = np.dot(X, self.w) + self.b
        return np.sign(linear_output)

In [102]:
# Initialize and train the Perceptron
perceptron = Perceptron(learning_rate=0.1, n_epochs=1000)
perceptron.fit(X_train, y_train)

# Make predictions on the test set
y_pred = perceptron.predict(X_test)

In [103]:
# Replace -1 back to 0 for evaluation metrics
y_test_evaluated = np.where(y_test == -1, 0, 1)
y_pred_evaluated = np.where(y_pred == -1, 0, 1)

accuracy = accuracy_score(y_test_evaluated, y_pred_evaluated)
precision = precision_score(y_test_evaluated, y_pred_evaluated)
recall = recall_score(y_test_evaluated, y_pred_evaluated)
f1 = f1_score(y_test_evaluated, y_pred_evaluated)

print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"F1 Score: {f1*100:.2f}%")

Accuracy: 76.62%
Precision: 65.08%
Recall: 74.55%
F1 Score: 69.49%


In [104]:
# Initialize and train the Perceptron, try with different learning rare
perceptron = Perceptron(learning_rate=0.01, n_epochs=1000)
perceptron.fit(X_train, y_train)

# Make predictions on the test set
y_pred = perceptron.predict(X_test)

In [105]:
# Replace -1 back to 0 for evaluation metrics
y_test_evaluated = np.where(y_test == -1, 0, 1)
y_pred_evaluated = np.where(y_pred == -1, 0, 1)

accuracy = accuracy_score(y_test_evaluated, y_pred_evaluated)
precision = precision_score(y_test_evaluated, y_pred_evaluated)
recall = recall_score(y_test_evaluated, y_pred_evaluated)
f1 = f1_score(y_test_evaluated, y_pred_evaluated)

print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"F1 Score: {f1*100:.2f}%")

Accuracy: 76.62%
Precision: 65.08%
Recall: 74.55%
F1 Score: 69.49%


In [106]:
# Select the most correlated features
selected_features = ['Glucose', 'BMI', 'Age']
X = data[selected_features].values

# Feature scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Encode labels
y = data['Outcome'].replace({0: -1, 1: 1}).values

# Split the data
from sklearn.model_selection import train_test_split
X_train_fs, X_test_fs, y_train_fs, y_test_fs = train_test_split(X, y, test_size=0.2, random_state=42)

# Perceptron class definition (same as before)
class Perceptron:
    def __init__(self, learning_rate=0.01, n_epochs=1000):
        self.lr = learning_rate
        self.n_epochs = n_epochs
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize weights and bias
        self.w = np.zeros(n_features)
        self.b = 0
        
        for epoch in range(self.n_epochs):
            errors = 0
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.w) + self.b
                y_predicted = np.sign(linear_output)
                
                # Update weights if there is a misclassification
                if y_predicted != y[idx]:
                    self.w += self.lr * y[idx] * x_i
                    self.b += self.lr * y[idx]
                    errors += 1         
    def predict(self, X):
        linear_output = np.dot(X, self.w) + self.b
        return np.sign(linear_output)

# Initialize and train the Perceptron with selected features
perceptron_fs = Perceptron(learning_rate=0.01, n_epochs=1000)
perceptron_fs.fit(X_train_fs, y_train_fs)

# Make predictions on the test set
y_pred_fs = perceptron_fs.predict(X_test_fs)

# Replace -1 back to 0 for evaluation metrics
y_test_evaluated_fs = np.where(y_test_fs == -1, 0, 1)
y_pred_evaluated_fs = np.where(y_pred_fs == -1, 0, 1)

accuracy_fs = accuracy_score(y_test_evaluated_fs, y_pred_evaluated_fs)
precision_fs = precision_score(y_test_evaluated_fs, y_pred_evaluated_fs)
recall_fs = recall_score(y_test_evaluated_fs, y_pred_evaluated_fs)
f1_fs = f1_score(y_test_evaluated_fs, y_pred_evaluated_fs)

print(f"Accuracy with selected features: {accuracy_fs*100:.2f}%")
print(f"Precision: {precision_fs*100:.2f}%")
print(f"Recall: {recall_fs*100:.2f}%")
print(f"F1 Score: {f1_fs*100:.2f}%")

Accuracy with selected features: 64.94%
Precision: 50.65%
Recall: 70.91%
F1 Score: 59.09%
