In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle
import numpy as np
import pandas as pd

In [None]:
df=pd.read_csv('diabetes.csv')
df

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148,0,33.6,50,1
1,85,0,26.6,31,0
2,183,0,23.3,32,1
3,89,94,28.1,21,0
4,137,168,43.1,33,1
...,...,...,...,...,...
763,101,180,32.9,63,0
764,122,0,36.8,27,0
765,121,112,26.2,30,0
766,126,0,30.1,47,1


In [None]:
df.isnull().sum()

Unnamed: 0,0
Glucose,0
Insulin,0
BMI,0
Age,0
Outcome,0


In [None]:
df.duplicated().sum()

0

In [None]:
replace_value = ['Glucose', 'Insulin', 'BMI', 'Age']
for col in replace_value:
    # Replace 0 with NaN
    df[col] = df[col].replace({0: np.nan})

    # Calculate the mean and reassign the filled values to the column
    mean_value = df[col].mean()
    df[col] = df[col].fillna(mean_value)


In [None]:
X = df[['Glucose','Insulin', 'BMI', 'Age']]
y = df['Outcome']  # Target variable

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train1=X_train
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Function to detect outliers using IQR
def calculate_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    return outliers

# Detect outliers for each relevant column
for col in ['Glucose','Insulin', 'BMI', 'Age']:
    outliers = calculate_outliers(df, col)
    print(f"Outliers in {col}:")
    print(outliers, "\n")

Outliers in Glucose:
Empty DataFrame
Columns: [Glucose, Insulin, BMI, Age, Outcome]
Index: [] 

Outliers in Insulin:
     Glucose  Insulin   BMI  Age  Outcome
8      197.0    543.0  30.5   53        1
13     189.0    846.0  30.1   59        1
16     118.0    230.0  45.8   31        1
20     126.0    235.0  39.3   27        0
31     158.0    245.0  31.6   28        1
..       ...      ...   ...  ...      ...
713    134.0    291.0  26.4   21        0
715    187.0    392.0  33.9   34        1
747     81.0     57.0  46.3   32        0
753    181.0    510.0  43.3   26        1
760     88.0     16.0  28.4   22        0

[164 rows x 5 columns] 

Outliers in BMI:
     Glucose     Insulin   BMI  Age  Outcome
120    162.0  100.000000  53.2   25        1
125     88.0   99.000000  55.0   26        1
177    129.0  130.000000  67.1   26        1
193    135.0  155.548223  52.3   40        1
247    165.0  680.000000  52.3   23        0
303    115.0  155.548223  52.9   28        1
445    180.0   14.000

In [None]:
df.describe()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0
mean,121.686763,155.548223,32.457464,33.240885,0.348958
std,30.435949,85.021108,6.875151,11.760232,0.476951
min,44.0,14.0,18.2,21.0,0.0
25%,99.75,121.5,27.5,24.0,0.0
50%,117.0,155.548223,32.4,29.0,0.0
75%,140.25,155.548223,36.6,41.0,1.0
max,199.0,846.0,67.1,81.0,1.0


In [None]:
df.head()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148.0,155.548223,33.6,50,1
1,85.0,155.548223,26.6,31,0
2,183.0,155.548223,23.3,32,1
3,89.0,94.0,28.1,21,0
4,137.0,168.0,43.1,33,1


In [None]:
X_train

array([[-1.25828206,  0.00739544,  0.01501323, -1.03594038],
       [-0.32735374,  0.00739544, -0.59935041,  1.48710085],
       [ 0.57032714, -0.82739127, -0.52719904, -0.94893896],
       ...,
       [-0.69307558,  0.00739544,  1.91151712,  0.44308379],
       [ 0.63682202,  0.00739544,  1.44974838, -0.33992901],
       [ 0.10486298,  0.00739544, -1.42187598, -1.03594038]])

In [None]:
X_test

array([[-0.7928179 ,  0.40381984,  0.23760544,  0.87809089],
       [-0.32735374,  0.00739544,  0.48292008, -1.03594038],
       [-0.4603435 ,  0.00739544, -0.22416331, -1.03594038],
       [-0.49359094,  0.00739544, -1.11884024,  0.0950781 ],
       [ 0.47058482,  0.00739544, -0.35403576,  1.48710085],
       [-0.6265807 ,  0.40381984,  0.77152554,  1.92210795],
       [-1.69049878, -0.90793779, -1.72491172, -0.94893896],
       [-0.16111654,  0.00739544,  0.20874489,  0.96509232],
       [ 1.06903874, -0.3326055 , -0.15201194,  0.35608237],
       [ 0.83630666,  0.00739544,  0.19431462,  2.79212217],
       [-0.36060118,  0.00739544, -0.70036232,  0.61708663],
       [ 1.90022474,  0.00739544,  0.26646598,  2.35711506],
       [ 0.8695541 ,  1.87667051, -0.20973303, -0.33992901],
       [-0.85931278, -1.01149761,  0.17988434,  0.87809089],
       [-1.1252923 , -1.59833655, -0.57048986, -0.94893896],
       [ 0.10486298,  0.13916698,  0.13659352, -0.42693043],
       [-1.25828206,  0.

Naive bayes model

In [None]:
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train, y_train)
y_pred_nb = naive_bayes_model.predict(X_test)

Perceptron model

In [None]:
# Using sklearn's Perceptron
perceptron_model = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
perceptron_model.fit(X_train, y_train)
y_pred_perceptron = perceptron_model.predict(X_test)

In [None]:
# Evaluation metrics
def evaluate_model(y_test, y_pred):
    return {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted')
    }

nb_metrics = evaluate_model(y_test, y_pred_nb)
perceptron_metrics = evaluate_model(y_test, y_pred_perceptron)

print("Naive Bayes Metrics:", nb_metrics)
print("Perceptron Metrics:", perceptron_metrics)

Naive Bayes Metrics: {'accuracy': 0.7597402597402597, 'precision': 0.7588095238095239, 'recall': 0.7597402597402597, 'f1_score': 0.7592418185290805}
Perceptron Metrics: {'accuracy': 0.6883116883116883, 'precision': 0.7151099367288066, 'recall': 0.6883116883116883, 'f1_score': 0.6944008372579802}


Custom perceptron

In [None]:
class CustomPerceptron:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = 0

    def fit(self, X, y):
        # Initialize weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)

        # Convert y from {0, 1} to {-1, 1} for compatibility with the Perceptron algorithm
        y_ = np.where(y <= 0, -1, 1)

        # Training loop
        for _ in range(self.n_iterations):
            for idx, x_i in enumerate(X):
                # Calculate linear output
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = np.sign(linear_output)

                # Update weights and bias if prediction is incorrect
                if y_predicted != y_[idx]:
                    self.weights += self.learning_rate * y_[idx] * x_i
                    self.bias += self.learning_rate * y_[idx]

    def predict(self, X):
        # Calculate the linear output and make predictions
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)


In [None]:
# Assuming X_train and y_train are defined
Cperceptron = CustomPerceptron(learning_rate=0.01, n_iterations=1000)
Cperceptron.fit(X_train, y_train)

In [None]:
# Predict on test data
y_pred_Cperceptron =Cperceptron.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example evaluation
accuracy = accuracy_score(y_test, y_pred_Cperceptron)
precision = precision_score(y_test, y_pred_Cperceptron)
recall = recall_score(y_test, y_pred_Cperceptron)
f1 = f1_score(y_test, y_pred_Cperceptron)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.7012987012987013
Precision: 0.5671641791044776
Recall: 0.6909090909090909
F1 Score: 0.6229508196721312


In [None]:
scaler.fit(X_train1)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)


In [None]:
with open('naive_bayes_model.pkl', 'wb') as f:
    pickle.dump(naive_bayes_model, f)

with open('perceptron_model.pkl', 'wb') as f:
    pickle.dump(perceptron_model, f)

with open('Cperceptron.pkl', 'wb') as f:
    pickle.dump(Cperceptron, f)