In [12]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [3]:
df = pd.read_csv("healthcare_dataset.csv")

In [6]:
df=df.drop(columns=['Name','Doctor',])
df.head(5)

Unnamed: 0,Age,Gender,Blood Type,Medical Condition,Billing Amount,Admission Type,Medication,Test Results,Length of Stay
0,30,Male,B-,Cancer,18856.281306,Urgent,Paracetamol,Normal,2
1,62,Male,A+,Obesity,33643.327287,Emergency,Ibuprofen,Inconclusive,6
2,76,Female,A-,Obesity,27955.096079,Emergency,Aspirin,Normal,15
3,28,Female,O+,Diabetes,37909.78241,Elective,Ibuprofen,Abnormal,30
4,43,Female,AB+,Cancer,14238.317814,Urgent,Penicillin,Abnormal,20


In [7]:
# Initialize LabelEncoders and OneHotEncoders
label_encoders = {
    'Gender': LabelEncoder(),
    'Admission Type': LabelEncoder(),
    'Test Results': LabelEncoder(),
}

one_hot_encoders = {
    'Blood Type': OneHotEncoder(sparse_output=False),  # Use sparse_output instead of sparse
    'Medication': OneHotEncoder(sparse_output=False),
    'Medical Condition': OneHotEncoder(sparse_output=False)
}

# Apply Label Encoding
for column, le in label_encoders.items():
    df[column] = le.fit_transform(df[column])

# Apply One-Hot Encoding
for column, ohe in one_hot_encoders.items():
    ohe_df = pd.DataFrame(ohe.fit_transform(df[[column]]), columns=ohe.get_feature_names_out([column]))
    df = pd.concat([df, ohe_df], axis=1).drop(columns=[column])

In [14]:
df.head()

Unnamed: 0,Age,Gender,Billing Amount,Admission Type,Test Results,Length of Stay,Blood Type_A+,Blood Type_A-,Blood Type_AB+,Blood Type_AB-,...,Medication_Ibuprofen,Medication_Lipitor,Medication_Paracetamol,Medication_Penicillin,Medical Condition_Arthritis,Medical Condition_Asthma,Medical Condition_Cancer,Medical Condition_Diabetes,Medical Condition_Hypertension,Medical Condition_Obesity
0,30,1,18856.281306,2,2,2,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,62,1,33643.327287,1,1,6,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,76,0,27955.096079,1,2,15,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,28,0,37909.78241,0,0,30,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,43,0,14238.317814,2,0,20,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0


In [8]:
# Target and Features
X = df.drop('Admission Type', axis=1)
y = df['Admission Type']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
# Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train_scaled, y_train)
y_pred_dt = dt.predict(X_test_scaled)
print("Decision Tree")
print(f"Accuracy: {accuracy_score(y_test, y_pred_dt)}")
print(classification_report(y_test, y_pred_dt))

Decision Tree
Accuracy: 0.8817117117117117
              precision    recall  f1-score   support

           0       0.88      0.89      0.88      7401
           1       0.88      0.88      0.88      7311
           2       0.89      0.88      0.89      7488

    accuracy                           0.88     22200
   macro avg       0.88      0.88      0.88     22200
weighted avg       0.88      0.88      0.88     22200



In [13]:
# Initialize and train the model
rf = RandomForestClassifier()
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)

# Evaluate the model
print("Random Forest Classifier")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf)}")
print(classification_report(y_test, y_pred_rf))


Random Forest Classifier
Accuracy: 0.8890990990990991
              precision    recall  f1-score   support

           0       0.89      0.89      0.89      7401
           1       0.89      0.88      0.89      7311
           2       0.89      0.90      0.89      7488

    accuracy                           0.89     22200
   macro avg       0.89      0.89      0.89     22200
weighted avg       0.89      0.89      0.89     22200

