# 1. Kidney Disease Model

In [4]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib


# Load dataset
kidney_df = pd.read_csv("Preprocessed_Data/Kidney_Preprocessed.csv")
kidney_df.drop(['Unnamed: 0'],inplace=True,axis = 1)

# Split features and target
X_kidney = kidney_df.drop(columns=["classification_notckd"])
y_kidney = kidney_df["classification_notckd"]

# # Handle imbalance using SMOTE
# smote = SMOTE(random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X_kidney, y_kidney)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_kidney, y_kidney, test_size=0.2, random_state=42, stratify=y_kidney
)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Define Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Track best model
best_model = None
best_score = 0
best_name = ""


# Train & Evaluate
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)

    # ===== Performance Matrix =====
    report = classification_report(y_test, y_pred_test)
    print(f"\nREPORT OF THE {name} Model:\n", report)

    score = accuracy_score(y_test, y_pred_test)
    print(f"Score = {score}")

    # Save the best model
    if score > best_score:
        best_score = score
        best_model = model
        best_name = name

# Save the Best Model
if best_model:
    joblib.dump(best_model, "Kidney_model.pkl")
    print(f"\nBest Model ({best_name}) saved as 'Kidney_model.pkl' with accuracy {best_score:.4f}")

# Save the Scaler
joblib.dump(scaler, "Kidney_scaler.pkl")



REPORT OF THE Logistic Regression Model:
               precision    recall  f1-score   support

       False       1.00      0.98      0.99        50
        True       0.97      1.00      0.98        30

    accuracy                           0.99        80
   macro avg       0.98      0.99      0.99        80
weighted avg       0.99      0.99      0.99        80

Score = 0.9875

REPORT OF THE Random Forest Model:
               precision    recall  f1-score   support

       False       0.94      1.00      0.97        50
        True       1.00      0.90      0.95        30

    accuracy                           0.96        80
   macro avg       0.97      0.95      0.96        80
weighted avg       0.96      0.96      0.96        80

Score = 0.9625

REPORT OF THE XGBoost Model:
               precision    recall  f1-score   support

       False       0.94      1.00      0.97        50
        True       1.00      0.90      0.95        30

    accuracy                           0.

Parameters: { "use_label_encoder" } are not used.



['Kidney_scaler.pkl']

# 2. Liver Disease Model

In [5]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib


# Load dataset
liver_df = pd.read_csv("Preprocessed_Data/Liver_Preprocessed.csv")
liver_df.drop(['Unnamed: 0'],inplace=True,axis = 1)

# Split features and target
X_liver = liver_df.drop(columns=["Dataset"])
y_liver = liver_df["Dataset"]

# Handle imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_liver, y_liver)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Define Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Track best model
best_model = None
best_score = 0
best_name = ""


# Train & Evaluate
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)

    # ===== Performance Matrix =====
    report = classification_report(y_test, y_pred_test)
    print(f"\nREPORT OF THE {name} Model:\n", report)

    score = accuracy_score(y_test, y_pred_test)
    print(f"Score = {score}")

    # Save the best model
    if score > best_score:
        best_score = score
        best_model = model
        best_name = name

# Save the Best Model
if best_model:
    joblib.dump(best_model, "liver_model.pkl")
    print(f"\nBest Model ({best_name}) saved as 'liver_model.pkl' with accuracy {best_score:.4f}")
    
# Save the Scaler
joblib.dump(scaler, "Liver_scaler.pkl")



REPORT OF THE Logistic Regression Model:
               precision    recall  f1-score   support

           0       0.72      0.83      0.77        84
           1       0.80      0.67      0.73        83

    accuracy                           0.75       167
   macro avg       0.76      0.75      0.75       167
weighted avg       0.76      0.75      0.75       167

Score = 0.7544910179640718

REPORT OF THE Random Forest Model:
               precision    recall  f1-score   support

           0       0.78      0.87      0.82        84
           1       0.85      0.75      0.79        83

    accuracy                           0.81       167
   macro avg       0.81      0.81      0.81       167
weighted avg       0.81      0.81      0.81       167

Score = 0.8083832335329342

REPORT OF THE XGBoost Model:
               precision    recall  f1-score   support

           0       0.82      0.85      0.83        84
           1       0.84      0.81      0.82        83

    accuracy     

Parameters: { "use_label_encoder" } are not used.



['Liver_scaler.pkl']

# 3. Parkinson Model

In [6]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib


# Load dataset
df = pd.read_csv("Preprocessed_Data/Parkinson_preprocessed.csv")
df.drop(['Unnamed: 0'],inplace=True,axis = 1)

# Split features and target
X = df.drop(columns=["status"])
y = df["status"]

# # Handle imbalance using SMOTE
# smote = SMOTE(random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X_liver, y_liver)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Define Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# Track best model
best_model = None
best_score = 0
best_name = ""


# Train & Evaluate
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)

    # ===== Performance Matrix =====
    report = classification_report(y_test, y_pred_test)
    print(f"\nREPORT OF THE {name} Model:\n", report)

    score = accuracy_score(y_test, y_pred_test)
    print(f"Score = {score}")

    # Save the best model
    if score > best_score:
        best_score = score
        best_model = model
        best_name = name

# Save the Best Model
if best_model:
    joblib.dump(best_model, "Parkinson_model.pkl")
    print(f"\nBest Model ({best_name}) saved as 'Parkinson_model.pkl' with accuracy {best_score:.4f}")
    
# Save the Scaler
joblib.dump(scaler, "Parkinson_scaler.pkl")



REPORT OF THE Logistic Regression Model:
               precision    recall  f1-score   support

           0       0.89      0.80      0.84        10
           1       0.93      0.97      0.95        29

    accuracy                           0.92        39
   macro avg       0.91      0.88      0.90        39
weighted avg       0.92      0.92      0.92        39

Score = 0.9230769230769231

REPORT OF THE Random Forest Model:
               precision    recall  f1-score   support

           0       0.89      0.80      0.84        10
           1       0.93      0.97      0.95        29

    accuracy                           0.92        39
   macro avg       0.91      0.88      0.90        39
weighted avg       0.92      0.92      0.92        39

Score = 0.9230769230769231

REPORT OF THE XGBoost Model:
               precision    recall  f1-score   support

           0       0.89      0.80      0.84        10
           1       0.93      0.97      0.95        29

    accuracy     

Parameters: { "use_label_encoder" } are not used.



['Parkinson_scaler.pkl']