# Importing Libraries

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Loading Dataset

In [20]:
data = pd.read_csv('liver_cirrhosis.csv')

# Data Processing

In [21]:
data['Age_Years'] = data['Age'] / 365

In [22]:
X = data.drop(columns=['Stage', 'Age'])  # Drop 'Stage' (target) and original 'Age'
y = data['Stage']

In [23]:
numerical_cols = ['Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 
                  'Alk_Phos', 'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin', 'Age_Years']
binary_cols = ['Sex', 'Ascites', 'Hepatomegaly', 'Spiders']
multi_class_cols = ['Edema', 'Status', 'Drug']

In [24]:
numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

binary_transformer = Pipeline(steps=[
    ('binary_encoder', OneHotEncoder(drop='if_binary'))
])

multi_class_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(drop='first'))  # Drop first to avoid redundancy
])

In [25]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('bin', binary_transformer, binary_cols),
        ('multi', multi_class_transformer, multi_class_cols)
    ]
)

In [26]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Splitting the Dataset into training and testing set

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fitting the Model

In [28]:
model.fit(X_train, y_train)

# Making Prediction and Accuracy Score

In [29]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9548
Classification Report:
               precision    recall  f1-score   support

           1       0.97      0.94      0.95      1657
           2       0.93      0.96      0.95      1697
           3       0.97      0.96      0.97      1646

    accuracy                           0.95      5000
   macro avg       0.96      0.95      0.95      5000
weighted avg       0.96      0.95      0.95      5000



# System for new dataset

In [30]:
import joblib
joblib.dump(model, 'liver_cirrhosis_stage_detector_pipeline.pkl')


['liver_cirrhosis_stage_detector_pipeline.pkl']

# Using System for making the predictions

In [32]:
import joblib
import pandas as pd

user_input = {
    'Bilirubin': 1.4,
    'Cholesterol': 250,
    'Albumin': 3.5,
    'Copper': 120,
    'Alk_Phos': 150,
    'SGOT': 45,
    'Tryglicerides': 120,
    'Platelets': 250,
    'Prothrombin': 12,
    'Age_Years': 52,
    'Sex': 'F',
    'Ascites': 'N',
    'Hepatomegaly': 'Y',
    'Spiders': 'N',
    'Edema': 'S',
    'Status': 'C',
    'Drug': 'Placebo'
}

# Ensure the columns match the training data format
user_df = pd.DataFrame([user_input])

predicted_stage = model.predict(user_df)
predicted_proba = model.predict_proba(user_df)  # Optional: Get probabilities

print(f"Predicted Stage of Liver Cirrhosis: {predicted_stage[0]}")
print(f"Prediction Probabilities (by Stage): {predicted_proba[0]}")



Predicted Stage of Liver Cirrhosis: 3
Prediction Probabilities (by Stage): [0.06 0.16 0.78]
