In [1]:
# ==========================================
# Anemia Classification Model
# ==========================================

import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression

# Load anemia dataset
df = pd.read_csv("../data/raw/anemia.csv")
df.head()

df.isnull().sum()

# Features
X = df.drop("Result", axis=1)
y = df["Result"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

# Model
model = LogisticRegression(max_iter=200)
model.fit(X_train_s, y_train)

pred = model.predict(X_test_s)

print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))

# Save
joblib.dump(model, "../models/anemia_model.pkl")
joblib.dump(scaler, "../models/scaler_anemia.pkl")

print("Anemia model saved!")


Accuracy: 0.9915730337078652
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       207
           1       0.98      1.00      0.99       149

    accuracy                           0.99       356
   macro avg       0.99      0.99      0.99       356
weighted avg       0.99      0.99      0.99       356

Anemia model saved!
