In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [4]:
# Load the dataset

data = pd.read_csv('indian_liver_patient.csv')

In [5]:
data.shape

(583, 11)

In [6]:
data.head

<bound method NDFrame.head of      Age  Gender  Total_Bilirubin  Direct_Bilirubin  Alkaline_Phosphotase  \
0     65  Female              0.7               0.1                   187   
1     62    Male             10.9               5.5                   699   
2     62    Male              7.3               4.1                   490   
3     58    Male              1.0               0.4                   182   
4     72    Male              3.9               2.0                   195   
..   ...     ...              ...               ...                   ...   
578   60    Male              0.5               0.1                   500   
579   40    Male              0.6               0.1                    98   
580   52    Male              0.8               0.2                   245   
581   31    Male              1.3               0.5                   184   
582   38    Male              1.0               0.3                   216   

     Alamine_Aminotransferase  Aspartate_Amin

In [14]:
# Select features and target
features = [
    'Age', 
    'Total_Bilirubin', 
    'Direct_Bilirubin', 
    'Alkaline_Phosphotase',
    'Alamine_Aminotransferase', 
    'Total_Protiens', 
    'Albumin'
]
X = data[features]
#y = data['Dataset']

# Adjust target labels from 1 and 2 to 0 and 1
y = data['Dataset'] - 1

In [15]:
X.shape

(583, 7)

In [16]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [39]:
# Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42, max_depth=30, n_estimators=100, oob_score=True, n_jobs=-1)
rf_model.fit(X_train, y_train)

# Predictions
rf_predictions = rf_model.predict(X_test)

# Accuracy
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f'Random Forest Accuracy: {rf_accuracy:.2f}')

Random Forest Accuracy: 0.73


In [61]:
# AdaBoost Classifier
ab_model = AdaBoostClassifier(random_state=42, n_estimators=50, learning_rate=1)

ab_model.fit(X_train, y_train)


In [41]:
# Predictions
ab_predictions = ab_model.predict(X_test)

# Accuracy
ab_accuracy = accuracy_score(y_test, ab_predictions)
print(f'AdaBoost Accuracy: {ab_accuracy:.2f}')

AdaBoost Accuracy: 0.72


In [62]:
from sklearn.tree import DecisionTreeClassifier

# Instantiate the classifier class
tree_clf = DecisionTreeClassifier()

# Grow a Decision Tree
tree_clf.fit(X_train, y_train)
y_pred=tree_clf.predict(X_test)

In [63]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print(accuracy_score(y_test, y_pred))

[[60 27]
 [13 17]]
              precision    recall  f1-score   support

           0       0.82      0.69      0.75        87
           1       0.39      0.57      0.46        30

    accuracy                           0.66       117
   macro avg       0.60      0.63      0.60       117
weighted avg       0.71      0.66      0.68       117

0.6581196581196581


In [64]:
# initialize the base classifier
base_cls = DecisionTreeClassifier()
# no. of base classifier
num_trees = 500

from sklearn.ensemble import BaggingClassifier
# bagging classifier
model = BaggingClassifier(base_estimator = base_cls,
                          n_estimators = num_trees)
model.fit(X_train, y_train)



In [65]:
y_np=model.predict(X_test)

In [66]:
#Visualizing Report:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print(confusion_matrix(y_test,y_np))
print(classification_report(y_test,y_np))
print(accuracy_score(y_test, y_np))

[[73 14]
 [18 12]]
              precision    recall  f1-score   support

           0       0.80      0.84      0.82        87
           1       0.46      0.40      0.43        30

    accuracy                           0.73       117
   macro avg       0.63      0.62      0.62       117
weighted avg       0.71      0.73      0.72       117

0.7264957264957265


In [72]:
# Import the AdaBoost classifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
bb=SVC()

# Create adaboost classifer object
abc = AdaBoostClassifier(base_estimator=bb, n_estimators=10, random_state=0, algorithm='SAMME')

# Train Adaboost Classifer
model1 = abc.fit(X_train, y_train)


#Predict the response for test dataset
y_pred = model1.predict(X_test)



In [73]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(classification_report(y_test, y_pred))
print()
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85        87
           1       0.00      0.00      0.00        30

    accuracy                           0.74       117
   macro avg       0.37      0.50      0.43       117
weighted avg       0.55      0.74      0.63       117


Confusion Matrix: 
 [[87  0]
 [30  0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [75]:
# Decision Tree as base estimator
dt_base = DecisionTreeClassifier()

# Random Forest
rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f'Random Forest Accuracy: {rf_accuracy:.2f}')

#AdaBoost with Decision Tree

dt_base = DecisionTreeClassifier(max_depth=1)
ab_model = AdaBoostClassifier(
    base_estimator=dt_base,
    algorithm='SAMME',
    random_state=42
)
ab_model.fit(X_train, y_train)
ab_predictions = ab_model.predict(X_test)
ab_accuracy = accuracy_score(y_test, ab_predictions)
print(f'AdaBoost Accuracy: {ab_accuracy:.2f}')

Random Forest Accuracy: 0.73
AdaBoost Accuracy: 0.75


