<a href="https://colab.research.google.com/github/amiralirh/AI_Proj4032/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!gdown 1FXdsX4VkJEO97xG3LUZsEz1zbn8Sef6t

Downloading...
From: https://drive.google.com/uc?id=1FXdsX4VkJEO97xG3LUZsEz1zbn8Sef6t
To: /content/diabetes.csv
  0% 0.00/23.9k [00:00<?, ?B/s]100% 23.9k/23.9k [00:00<00:00, 61.2MB/s]


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Step 1: Data Cleaning and Handling Zero Values
# =======================================================
# Load the dataset
df = pd.read_csv('diabetes.csv')

# Columns where zero values are illogical
columns_with_zeros = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']

# Replace zero values with the median of non-zero values
for column in columns_with_zeros:
    median_val = df.loc[df[column] != 0, column].median()
    df[column] = df[column].replace(0, median_val)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [None]:
# Step 2: Data Preparation and Normalization
# =======================================================
# Define features (X) and target variable (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Normalize features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Step 3: MLP Model Implementation and Evaluation
# =======================================================
# Create an instance of the MLP model with one hidden layer of 100 neurons and a maximum of 500 iterations
model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42, activation='relu')

# Train the model on the training data
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

In [None]:
# Calculate and display evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("MLP Model Evaluation Results:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

MLP Model Evaluation Results:
Accuracy: 0.7468
Precision: 0.6744
Recall: 0.5370
F1-Score: 0.5979
