In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import VotingClassifier

### Import and define data

In [2]:
df = pd.read_csv('indian_liver_patient_preprocessed.csv', index_col = 0)

In [3]:
df.head(3)

Unnamed: 0,Age_std,Total_Bilirubin_std,Direct_Bilirubin_std,Alkaline_Phosphotase_std,Alamine_Aminotransferase_std,Aspartate_Aminotransferase_std,Total_Protiens_std,Albumin_std,Albumin_and_Globulin_Ratio_std,Is_male_std,Liver_disease
0,1.247403,-0.42032,-0.495414,-0.42887,-0.355832,-0.319111,0.293722,0.203446,-0.14739,0,1
1,1.062306,1.218936,1.423518,1.675083,-0.093573,-0.035962,0.939655,0.077462,-0.648461,1,1
2,1.062306,0.640375,0.926017,0.816243,-0.115428,-0.146459,0.478274,0.203446,-0.178707,1,1


In [4]:
df.shape

(579, 11)

In [5]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

### Split into train and test sets

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

### Instantiate a classifiers

In [7]:
lr = LogisticRegression()
knn = KNN(n_neighbors=27)
dt = DecisionTreeClassifier(min_samples_leaf=0.13)

In [8]:
classifiers = [('Logistic Regression', lr),
               ('K Nearest Neighbours', knn),
               ('Claddification Tree', dt)]

### Loop over clfs - fit and predict

In [9]:
for clf_name, clf in classifiers:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print('{:s} : {:.3f}'.format(clf_name, accuracy))

Logistic Regression : 0.776
K Nearest Neighbours : 0.678
Claddification Tree : 0.730


### Instantiate a VotingClassifier

In [10]:
vc = VotingClassifier(estimators=classifiers)   

In [11]:
vc.fit(X_train, y_train)   
y_pred = vc.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, y_pred)
print('Voting Classifier: {:.3f}'.format(accuracy))

Voting Classifier: 0.753
