In [37]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [38]:
def load_dataset(filename):
    data = []
    labels = []
    with open(filename, 'r') as file:
        for line in file:
            values = line.split()
            labels.append(float(values[0]))  # Labels are already in 1 or -1 format
            features = [float(v.split(":")[1]) for v in values[1:]]
            data.append(features)
    return np.array(data), np.array(labels)

# Perceptron
class Perceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self._unit_step_function(linear_output)

                # Update rule
                update = self.lr * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return self._unit_step_function(linear_output)

    def _unit_step_function(self, x):
        return np.where(x >= 0, 1, -1)

# Load dataset
X, y = load_dataset('diabetes.txt')

# Clean the data by treating zero values
X_cleaned = X.copy()
columns_with_missing_values = [1, 2, 3, 4, 5]

# Replace zeros with NaN
for col in columns_with_missing_values:
    X_cleaned[:, col][X_cleaned[:, col] == 0] = np.nan

# Impute missing values 
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X_cleaned = imputer.fit_transform(X_cleaned)

# Scale the features to zero mean and unit variance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_cleaned)

# Set a DataFrame for the scaled data
feature_columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigree', 'Age']
df_scaled = pd.DataFrame(X_scaled, columns=feature_columns)
df_scaled['Label'] = y

# Print the header
print(df_scaled.head())

# Separate the data for training and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Perceptron model Initializing
perceptron = Perceptron(learning_rate=0.01, n_iters=1000)

# Model training
perceptron.fit(X_train, y_train)

# Model testing
predictions = perceptron.predict(X_test)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, pos_label=1)
recall = recall_score(y_test, predictions, pos_label=1)

print(f"Perceptron classification accuracy: {accuracy * 100:.2f}%")
print(f"Perceptron precision: {precision:.2f}")
print(f"Perceptron recall: {recall:.2f}")

   Pregnancies   Glucose  BloodPressure  SkinThickness       Insulin  \
0     0.639947  0.865108      -0.033518   6.655021e-01 -7.024666e-15   
1    -0.844885 -1.206162      -0.529859  -1.746338e-02 -7.024666e-15   
2     1.233880  2.015813      -0.695306   5.661555e-15 -7.024666e-15   
3    -0.844885 -1.074652      -0.529859  -7.004289e-01 -7.243887e-01   
4    -1.141852  0.503458      -2.680669   6.655021e-01  1.465506e-01   

        BMI  DiabetesPedigree       Age  Label  
0  0.166291          0.468492  1.425995   -1.0  
1 -0.852531         -0.365061 -0.190672    1.0  
2 -1.332834          0.604397 -0.105584   -1.0  
3 -0.634212         -0.920763 -1.041549    1.0  
4  1.548980          5.484909 -0.020496   -1.0  
Perceptron classification accuracy: 81.17%
Perceptron precision: 0.84
Perceptron recall: 0.88
