In [5]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Load the Diabetes Dataset
# Get the current working directory
cwd = os.getcwd()

# Construct a file path
file_path = os.path.join(cwd, 'diabetes.csv')
diabetes_df = pd.read_csv(file_path)

# Preprocessing
# Replace zero values with NaN for specific columns
columns_with_zeros = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
diabetes_df[columns_with_zeros] = diabetes_df[columns_with_zeros].replace(0, np.nan)

# Fill NaN values with the mean of the column
diabetes_df.fillna(diabetes_df.mean(), inplace=True)

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(diabetes_df.drop('Outcome', axis=1))
y = diabetes_df['Outcome']

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and Train the Perceptron Model
perceptron_model = Perceptron(random_state=42)
perceptron_model.fit(X_train, y_train)

# Make Predictions and Evaluate the Perceptron Model
y_pred = perceptron_model.predict(X_test)
print("Perceptron Model Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Initialize and Train a Benchmark Model (Logistic Regression)
benchmark_model = LogisticRegression(random_state=42)
benchmark_model.fit(X_train, y_train)

# Make Predictions and Evaluate the Benchmark Model
benchmark_y_pred = benchmark_model.predict(X_test)
print("\nBenchmark Model (Logistic Regression) Metrics:")
print("Accuracy:", accuracy_score(y_test, benchmark_y_pred))
print("Precision:", precision_score(y_test, benchmark_y_pred))
print("Recall:", recall_score(y_test, benchmark_y_pred))
print("F1 Score:", f1_score(y_test, benchmark_y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, benchmark_y_pred))


Perceptron Model Metrics:
Accuracy: 0.6558441558441559
Precision: 0.5217391304347826
Recall: 0.43636363636363634
F1 Score: 0.4752475247524752
Confusion Matrix:
 [[77 22]
 [31 24]]

Benchmark Model (Logistic Regression) Metrics:
Accuracy: 0.7532467532467533
Precision: 0.6666666666666666
Recall: 0.6181818181818182
F1 Score: 0.6415094339622642
Confusion Matrix:
 [[82 17]
 [21 34]]
