In [4]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error as MSE

### Import and define data

In [5]:
df = pd.read_csv('indian_liver_patient_preprocessed.csv', index_col = 0)

In [6]:
df.head(2)

Unnamed: 0,Age_std,Total_Bilirubin_std,Direct_Bilirubin_std,Alkaline_Phosphotase_std,Alamine_Aminotransferase_std,Aspartate_Aminotransferase_std,Total_Protiens_std,Albumin_std,Albumin_and_Globulin_Ratio_std,Is_male_std,Liver_disease
0,1.247403,-0.42032,-0.495414,-0.42887,-0.355832,-0.319111,0.293722,0.203446,-0.14739,0,1
1,1.062306,1.218936,1.423518,1.675083,-0.093573,-0.035962,0.939655,0.077462,-0.648461,1,1


In [7]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

### Split into train and test sets

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

### Instantiate classifiers

In [9]:
dt = DecisionTreeClassifier(random_state=1)
dt_2 = DecisionTreeClassifier(random_state=1)
bc = BaggingClassifier(base_estimator=dt, n_estimators=50, random_state=1)

### Fit and predict, calculate accuracy score

In [10]:
dt_2.fit(X_train, y_train)
y_pred_2 = dt_2.predict(X_test)
acc_test_2 = accuracy_score(y_test, y_pred_2)
print('Test set accuracy of bc: {:.2f}'.format(acc_test_2)) 

Test set accuracy of bc: 0.69


In [11]:
bc.fit(X_train, y_train)
y_pred = bc.predict(X_test)
acc_test = accuracy_score(y_test, y_pred)
print('Test set accuracy of bc: {:.2f}'.format(acc_test)) 

Test set accuracy of bc: 0.72


Thanks to the introduction of the bagging method, the results were improved 

### Calculating RMSE

In [18]:
rmse_test = MSE(y_test, y_pred_2) ** (1/2)
round(rmse_test, 3)

0.557

In [19]:
rmse_test = MSE(y_test, y_pred) ** (1/2)
round(rmse_test, 3)

0.525

In [20]:
print('Test set RMSE of rf: {:.2f}'.format(rmse_test))

Test set RMSE of rf: 0.53
