In [14]:
# Activity 1: Train Multiple ML Algorithms

# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# ML Models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


In [None]:
# Step 2: Load dataset
df = pd.read_csv('../data/liver_dataset_cleaned.csv')  # Adjust path if needed

# Split features and labels
X = df.drop('Target', axis=1)
y = df['Target']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [16]:
# Step 3: Initialize and Train Models

models = {
    'Logistic Regression': LogisticRegression(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    pre = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results.append({
        'Model': name,
        'Accuracy': acc,
        'Precision': pre,
        'Recall': rec,
        'F1 Score': f1
    })

    print(f"📌 {name}")
    print(classification_report(y_test, y_pred))
    print("-" * 50)


📌 Logistic Regression
              precision    recall  f1-score   support

           0       0.67      0.18      0.29        33
           1       0.75      0.96      0.84        83

    accuracy                           0.74       116
   macro avg       0.71      0.57      0.56       116
weighted avg       0.72      0.74      0.68       116

--------------------------------------------------
📌 K-Nearest Neighbors
              precision    recall  f1-score   support

           0       0.43      0.30      0.36        33
           1       0.75      0.84      0.80        83

    accuracy                           0.69       116
   macro avg       0.59      0.57      0.58       116
weighted avg       0.66      0.69      0.67       116

--------------------------------------------------
📌 Random Forest
              precision    recall  f1-score   support

           0       0.45      0.15      0.23        33
           1       0.73      0.93      0.82        83

    accuracy        

Parameters: { "use_label_encoder" } are not used.



📌 XGBoost
              precision    recall  f1-score   support

           0       0.64      0.27      0.38        33
           1       0.76      0.94      0.84        83

    accuracy                           0.75       116
   macro avg       0.70      0.61      0.61       116
weighted avg       0.73      0.75      0.71       116

--------------------------------------------------


In [17]:
# Step 4: Compare Results
results_df = pd.DataFrame(results)
results_df.sort_values(by='F1 Score', ascending=False)


Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
3,XGBoost,0.75,0.764706,0.939759,0.843243
0,Logistic Regression,0.741379,0.747664,0.963855,0.842105
2,Random Forest,0.706897,0.733333,0.927711,0.819149
1,K-Nearest Neighbors,0.689655,0.752688,0.843373,0.795455
