In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("kyphosis.csv")
df.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [4]:
df.shape

(81, 4)

In [5]:
X = df[['Age','Number','Start']]
y = df['Kyphosis']

In [6]:
le_encoder = LabelEncoder()
y = le_encoder.fit_transform(y)

In [7]:
sc = StandardScaler()
X = sc.fit_transform(X)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.15, random_state=20)

## Stacking Approach

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier

In [10]:
VotingClassifier?

[1;31mInit signature:[0m
[0mVotingClassifier[0m[1;33m([0m[1;33m
[0m    [0mestimators[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mvoting[0m[1;33m=[0m[1;34m'hard'[0m[1;33m,[0m[1;33m
[0m    [0mweights[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mn_jobs[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mflatten_transform[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mverbose[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
Soft Voting/Majority Rule classifier for unfitted estimators.

Read more in the :ref:`User Guide <voting_classifier>`.

.. versionadded:: 0.17

Parameters
----------
estimators : list of (str, estimator) tuples
    Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
    of those original estimators that will be stored in the class attribute
    ``self.estimators_``. An estimator can be set

In [11]:
lr_model = LogisticRegression()
svc = SVC()
knn = KNeighborsClassifier()

In [12]:
estimators = [('lr', lr_model), ('svc',svc), ('knn',knn)]

In [13]:
vc = VotingClassifier(estimators=estimators)
vc.fit(X_train, y_train)

In [14]:
vc.predict(X_test)

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [15]:
vc.score(X_test, y_test)

0.6923076923076923

In [16]:
vc.score(X_train, y_train)

0.8529411764705882

In [17]:
vc.named_estimators_['lr'].predict(X_test)

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

## Bagging Approach

In [18]:
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier

In [19]:
rfc = RandomForestClassifier(n_estimators=20, random_state=2)

In [20]:
rfc.fit(X_train, y_train)

In [21]:
rfc.score(X_test, y_test)

0.8461538461538461

In [22]:
bc = BaggingClassifier(estimator= knn, n_estimators=20)

In [23]:
bc.fit(X_train, y_train)


In [24]:
bc.score(X_test, y_test)

0.7692307692307693

In [38]:
etc = ExtraTreesClassifier(n_estimators=20)
etc.fit(X_train, y_train)

In [39]:
etc.score(X_test, y_test)

0.9230769230769231

## Boosting Approach

In [25]:
from sklearn.ensemble import AdaBoostClassifier

In [26]:
abc = AdaBoostClassifier(n_estimators = 20)

In [27]:
abc.fit(X_train,y_train)

In [28]:
abc.score(X_test, y_test)

0.8461538461538461

In [29]:
lr_model.fit(X_train, y_train)

In [30]:
proba1 = lr_model.predict_proba(X_test)[:,1]

In [31]:
y_pred = lr_model.predict(X_test)

In [32]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.6923076923076923

In [33]:
y_pred = [int(prob>=0.3) for prob in proba1]

In [34]:
accuracy_score(y_test, y_pred)

0.7692307692307693

In [35]:
le_encoder.inverse_transform(y_pred)

array(['present', 'absent', 'absent', 'absent', 'absent', 'present',
       'absent', 'absent', 'absent', 'present', 'present', 'absent',
       'absent'], dtype=object)