In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [10]:
# Step 2: Load Dataset
df = pd.read_csv("ILPD.csv")

# Rename columns if needed
df.columns = ['Age', 'Gender', 'TB', 'DB', 'Alkphos', 'Sgpt', 'Sgot', 
              'TP', 'ALB', 'A/G Ratio', 'Selector']

# Convert 'A/G Ratio' to numeric
df['A/G Ratio'] = pd.to_numeric(df['A/G Ratio'], errors='coerce')

# Fill NaNs in 'A/G Ratio' using future-safe method
df.fillna({'A/G Ratio': df['A/G Ratio'].mean()}, inplace=True)


In [11]:
# Step 3: Preprocess
# Encode Gender
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])

# Convert 'Selector' to binary target (1 = Liver disease, 0 = No disease)
df['Selector'] = df['Selector'].apply(lambda x: 1 if x == 1 else 0)


In [12]:
# Step 4: Split features and target
X = df.drop('Selector', axis=1)
y = df['Selector']

# Step 5: Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [13]:
# Step 6: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [14]:
# Step 7: Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 8: Predict and Evaluate
y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("✅ Classification Report:\n", classification_report(y_test, y_pred))


✅ Accuracy: 0.7350427350427351
✅ Classification Report:
               precision    recall  f1-score   support

           0       0.48      0.37      0.42        30
           1       0.80      0.86      0.83        87

    accuracy                           0.74       117
   macro avg       0.64      0.61      0.62       117
weighted avg       0.72      0.74      0.72       117



In [15]:
# Step 9: Save the model and scaler
joblib.dump(model, 'rf_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("✅ Model and Scaler saved as 'rf_model.pkl' and 'scaler.pkl'")


✅ Model and Scaler saved as 'rf_model.pkl' and 'scaler.pkl'


In [18]:
import os
print("Current working directory:", os.getcwd())



Current working directory: f:\project
