In [14]:
from sklearn import datasets 
import pandas as pd
import numpy as np 

X, y = datasets.make_moons(n_samples=1000, noise=0.4)

feature_names = ["Feature #0", "Features #1"]
target_name = "class"

moons = pd.DataFrame(np.concatenate([X, y[:, np.newaxis]], axis=1),
                     columns=feature_names + [target_name])
data_moons, target_moons = moons[feature_names], moons[target_name]

In [15]:
from sklearn.ensemble import RandomForestClassifier 
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.ensemble import VotingClassifier 

random_forest_clf = RandomForestClassifier()
log_reg = LogisticRegression()
svm_clf = SVC()
voting_class = VotingClassifier(
    estimators=[('random_forest',random_forest_clf), ('lg', log_reg), ('smv', svm_clf)], 
    voting='hard'
)


In [21]:
random_forest_clf.fit(X, y)
log_reg.fit(X, y)
svm_clf.fit(X, y)
voting_class.fit(X, y)

In [20]:
# Creating the test case
X, y = datasets.make_moons(n_samples=1000, noise=0.4)

feature_names = ["Feature #0", "Features #1"]
target_name = "class"

moons = pd.DataFrame(np.concatenate([X, y[:, np.newaxis]], axis=1),
                     columns=feature_names + [target_name])
data_moons, target_moons = moons[feature_names], moons[target_name]

In [19]:
from sklearn.metrics import accuracy_score 

for model in (random_forest_clf, log_reg, svm_clf, voting_class):
    y_predcit = model.predict(X)
    score = accuracy_score(y_predcit, y)
    print(model.__class__.__name__, score)

RandomForestClassifier 0.852
LogisticRegression 0.835
SVC 0.863
VotingClassifier 0.854


In [32]:
## Trying it again but with soft voting
svm_clf = SVC(probability=True)
voting_class = VotingClassifier(
    estimators=[('random_forest',random_forest_clf), ('lg', log_reg), ('smv', svm_clf)], 
    voting='soft'
)

X, y = datasets.make_moons(n_samples=1000, noise=0.4)

feature_names = ["Feature #0", "Features #1"]
target_name = "class"

moons = pd.DataFrame(np.concatenate([X, y[:, np.newaxis]], axis=1),
                     columns=feature_names + [target_name])
data_moons, target_moons = moons[feature_names], moons[target_name]

random_forest_clf.fit(X, y)
log_reg.fit(X, y)
svm_clf.fit(X, y)
voting_class.fit(X, y)

In [30]:
X, y = datasets.make_moons(n_samples=1000, noise=0.4)

feature_names = ["Feature #0", "Features #1"]
target_name = "class"

moons = pd.DataFrame(np.concatenate([X, y[:, np.newaxis]], axis=1),
                     columns=feature_names + [target_name])
data_moons, target_moons = moons[feature_names], moons[target_name]

In [34]:
for model in (random_forest_clf, log_reg, svm_clf, voting_class):
    y_predcit = model.predict(X)
    score = accuracy_score(y_predcit, y)
    print(model.__class__.__name__, score)

RandomForestClassifier 1.0
LogisticRegression 0.837
SVC 0.857
VotingClassifier 0.894


In [36]:
# Creatng train and test cases

X, y = datasets.make_moons(n_samples=1000, noise=0.4)

feature_names = ["Feature #0", "Features #1"]
target_name = "class"

moons = pd.DataFrame(np.concatenate([X, y[:, np.newaxis]], axis=1),
                     columns=feature_names + [target_name])
X_train, y_train = moons[feature_names], moons[target_name]

X, y = datasets.make_moons(n_samples=1000, noise=0.4)

feature_names = ["Feature #0", "Features #1"]
target_name = "class"

moons = pd.DataFrame(np.concatenate([X, y[:, np.newaxis]], axis=1),
                     columns=feature_names + [target_name])
X_test, y_test = moons[feature_names], moons[target_name]

In [38]:
# Using Bagging and Pasting 

from sklearn.ensemble import BaggingClassifier 
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    estimator = DecisionTreeClassifier(), 
    n_estimators=500, 
    max_samples=100, 
    n_jobs = 1, 
    bootstrap=True #True = Bagging = With replacement
)
bag_clf.fit(X_train, y_train)
predict = bag_clf.predict(X_test)
print("With Replacement", accuracy_score(predict, y_test))

bag_clf = BaggingClassifier(
    estimator = DecisionTreeClassifier(), 
    n_estimators=500, 
    max_samples=100, 
    n_jobs = 1, 
    bootstrap=False #False = Pasting = Without replacement
)

bag_clf.fit(X_train, y_train)
predict = bag_clf.predict(X_test)
print("Without Replacement", accuracy_score(predict, y_test))

With Replacement 0.875
Without Replacement 0.873


In [40]:
# Out of bag evulation 
bag_clf = BaggingClassifier(
    estimator = DecisionTreeClassifier(), 
    n_estimators=500, 
    max_samples=100, 
    n_jobs = 1, 
    bootstrap = True, 
    oob_score = True 
)

bag_clf.fit(X_train, y_train)
print('Out of bag score', bag_clf.oob_score_)
predict = bag_clf.predict(X_test)
print('accuracy score', accuracy_score(predict, y_test))

Out of bag score 0.836
accuracy score 0.872


In [41]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rfc.fit(X_train, y_train)

predict = rfc.predict(X_test)
print("accuracy score", accuracy_score(predict, y_test))

accuracy score 0.871
