# Advanced Machine Learning 

In [54]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
pd.options.display.max_columns = 50


# Load dataset (iris), split into X_train, y_train, X_test, y_test!
# Write your code here 

iris = datasets.load_iris()
X = iris.data[:, :]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [48]:
pd.Series(y_train).value_counts()

2    37
1    37
0    31
dtype: int64

## 1. Voting 

In [38]:
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

# Hint: 
# clf_voting = VotingClassifier( estimators=[('label1', clf_1),
# ('label2', clf_2),
# ('labelN', clf_N)]) 

# Create the individual models
clf_knn = KNeighborsClassifier()
clf_dt = DecisionTreeClassifier()
clf_lr = LogisticRegression()

# Create voting classifier
clf_voting = VotingClassifier(estimators=[
('knn', clf_knn),
('dt', clf_dt),
('lr', clf_lr)])

# Fit it to the training set and predict
clf_voting.fit(X_train, y_train)
y_pred = clf_voting.predict(X_test)

# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 1.000


In [39]:
clf_knn.fit(X_train, y_train)

y_pred = clf_knn.predict(X_test)

# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 1.000


In [40]:
clf_dt.fit(X_train, y_train)

y_pred = clf_dt.predict(X_test)

# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 1.000


In [41]:
clf_lr.fit(X_train, y_train)

y_pred = clf_lr.predict(X_test)

# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.978


## 2. Averaging 

In [49]:
# Template for averaging Classifier 

from sklearn.ensemble import VotingClassifier

clf_voting = VotingClassifier(estimators=[
('knn', clf_knn),
('dt', clf_dt),
('lr', clf_lr)], voting="soft")

# Fit it to the training set and predict
clf_voting.fit(X_train, y_train)
y_pred = clf_voting.predict(X_test)

# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 1.000


In [None]:
# Initiate the individual models 

# Write your code here! 

# Create averaging classifier

# Write your code here! 

## 3. Bagging

In [62]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier

bc = datasets.load_breast_cancer()

X = bc.data[:, :]
y = bc.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

bagging = BaggingClassifier(KNeighborsClassifier(),
                           max_samples=0.5, max_features=0.5)

In [71]:
pd.Series(y).value_counts(normalize=True)

1    0.627417
0    0.372583
dtype: float64

In [63]:
# Fit it to the training set and predict
bagging.fit(X_train, y_train)
y_pred = bagging.predict(X_test)


# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.942


In [67]:
# Write your code here if you use RandomForest, compare with above!

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()

# Fit it to the training set and predict
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)


# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.965


## 4. Boosting
Source: https://scikit-learn.org/stable/modules/ensemble.html

In [73]:
from sklearn.ensemble import AdaBoostClassifier
clf_ada = AdaBoostClassifier(
DecisionTreeClassifier(),
n_estimators=75,
learning_rate=0.2
)

# base_estimator
# Default: Decision Tree (max_depth=1)
# n_estimators
# Default: 50
# learning_rate
# Default: 1.0
# Trade-off between n_estimators and
# learning_rate

# Fit it to the training set and predict
clf_ada.fit(X_train, y_train)
y_pred = clf_ada.predict(X_test)


# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.936


### Create AdaBoost Classifier for iris dataset!

In [None]:
# Write your code here!

In [75]:
from sklearn.ensemble import AdaBoostRegressor

clf_ada = AdaBoostRegressor(
    DecisionTreeClassifier(),
    n_estimators=75,
    learning_rate=0.2
)

# base_estimator
# Default: Decision Tree (max_depth=3)
# loss
# linear (default)
# square
# exponential

In [86]:
# Fit it to the training set and predict
clf_ada.fit(X_train, y_train)
y_pred = clf_ada.predict(X_test)


# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

### Create GradientBoostingClassifier for iris Dataset!

In [80]:
!pip install catboost
!pip install lightgbm
!pip install catboost

Collecting catboost
  Using cached https://files.pythonhosted.org/packages/be/1e/69b342a7630d1d84cdf29cda07916081b0a45092fb1fea41a6a16589fe1c/catboost-0.18.1-cp36-none-win_amd64.whl
Collecting graphviz
  Using cached https://files.pythonhosted.org/packages/f5/74/dbed754c0abd63768d3a7a7b472da35b08ac442cf87d73d5850a6f32391e/graphviz-0.13.2-py2.py3-none-any.whl
Installing collected packages: graphviz, catboost
Successfully installed catboost-0.18.1 graphviz-0.13.2


## 5. Stacking  

In [88]:
from mlxtend.classifier import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [92]:
# Instantiate the 1st-layer classifiers
clf1 = KNeighborsClassifier(5)
clf2 = DecisionTreeClassifier()
clf3 = LogisticRegression()

# Instantiate the 2nd-layer classifier
clf_meta = LogisticRegression()

# Build the Stacking classifier
clf_stack = StackingClassifier(
    classifiers=[clf1, clf2, clf3],
    meta_classifier=clf_meta,
    use_probas=False,
    use_features_in_secondary=False
)

# Use the fit and predict methods
clf_stack.fit(X_train, y_train)
y_pred = clf_stack.predict(X_test)

# Get the accuracy score
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.924
