In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

# Load the dataset
liver_data = pd.read_csv("Liver_Disease_data.csv")

# Remove missing values in target column
liver_data = liver_data.dropna(subset=['category'])

# Encode the target variable (Convert 'no_disease', 'hepatitis' to numbers)
label_encoder = LabelEncoder()
liver_data['category'] = label_encoder.fit_transform(liver_data['category'])

# Separate features (X) and target (y)
X = liver_data.drop(columns=['category'])
y = liver_data['category']

# Convert categorical 'sex' column into numerical values
X['sex'] = X['sex'].map({'m': 0, 'f': 1})  # Encode 'm' as 0 and 'f' as 1

# Fill any remaining missing values with column means
X.fillna(X.mean(), inplace=True)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model
joblib.dump(model, "liver_disease_model.pkl")
print("Model saved as liver_disease_model.pkl")


Model saved as liver_disease_model.pkl


In [3]:
import os
print(os.getcwd())  # This prints the current working directory

C:\Users\fUJITSU\Desktop\AD_Traing\flask
