In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

X, y = make_moons(n_samples=500, noise = .3, random_state=42)

log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svc_clf = SVC(gamma="scale", random_state=42, probability=True)

voting_clf = VotingClassifier(
    estimators=[('log',log_clf),
                ('rf',rf_clf),
                ('svc',svc_clf)],
    voting='soft'
)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.2, random_state=42)

voting_clf.fit(X_train,y_train)


VotingClassifier(estimators=[('log', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(probability=True, random_state=42))],
                 voting='soft')

In [2]:
from sklearn.metrics import accuracy_score

for clf in (log_clf,rf_clf,svc_clf,voting_clf):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.85
RandomForestClassifier 0.88
SVC 0.87
VotingClassifier 0.89


In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500, max_samples=100,
    bootstrap=True, n_jobs=-1
)

bag_clf.fit(X_train,y_train)
accuracy_score(y_test, bag_clf.predict(X_test))

0.91

In [4]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1, max_leaf_nodes=16)
rnd_clf.fit(X_train,y_train)
accuracy_score(y_test, rnd_clf.predict(X_test))


0.89

In [5]:
from sklearn.ensemble import ExtraTreesClassifier

rnd_clf = ExtraTreesClassifier(n_estimators=500, n_jobs=-1, max_leaf_nodes=16)
rnd_clf.fit(X_train,y_train)
accuracy_score(y_test, rnd_clf.predict(X_test))

0.9

In [6]:
rnd_clf.feature_importances_

array([0.4344176, 0.5655824])

In [7]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1),
    n_estimators=200,algorithm="SAMME.R",
    learning_rate=.5
)

ada_clf.fit(X_train, y_train)

accuracy_score(y_test, ada_clf.predict(X_test))

0.89

In [8]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=300, learning_rate=1)

gbrt.fit(X_train, y_train)

np.sqrt(mean_squared_error(y_test, gbrt.predict(X_test)))

0.41554250285712335

In [9]:
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=1)

gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_test,y_pred) for y_pred in gbrt.staged_predict(X_test)]

best_n_estimator = np.argmin(errors) + 1

best_n_estimator

5

In [10]:
gbrt = GradientBoostingRegressor(max_depth=2,warm_start=True)

min_val_error = float("inf")
error_going_up = 0

for n_estimators in range(1,120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train,y_train)
    error = mean_squared_error(y_test,gbrt.predict(X_test))
    if error < min_val_error:
        min_val_error = error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up >= 5:
            break

In [11]:
n_estimators

58

In [12]:
import xgboost

In [18]:
xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train,y_train, eval_set=[(X_test,y_test)],
    early_stopping_rounds=2)
y_pred = xgb_reg.predict(X_test)
np.sqrt(mean_squared_error(y_test,y_pred))

[0]	validation_0-rmse:0.40037
[1]	validation_0-rmse:0.34405
[2]	validation_0-rmse:0.31488
[3]	validation_0-rmse:0.29773
[4]	validation_0-rmse:0.29084
[5]	validation_0-rmse:0.29154
[6]	validation_0-rmse:0.28875
[7]	validation_0-rmse:0.28993
[8]	validation_0-rmse:0.29389




0.28874741160390127

In [20]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
# mnist.target = mnist.target.astype(np.uint8)

In [29]:
from sklearn.model_selection import train_test_split

X_train_and_val, X_test, y_train_and_val, y_test = train_test_split(mnist.data,mnist.target, test_size=10_000) 

X_train, X_val, y_train, y_val = train_test_split(X_train_and_val, y_train_and_val, test_size=10_000)

In [31]:
X_train.shape

(50000, 784)

In [32]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC

rnd_clf = RandomForestClassifier()
ext_clf = ExtraTreesClassifier()
svm_clf = SVC()

rnd_clf.fit(X_train,y_train)
ext_clf.fit(X_train,y_train)
svm_clf.fit(X_train,y_train)

SVC()

In [33]:
print(
    accuracy_score(y_val,rnd_clf.predict(X_val)),
    accuracy_score(y_val,ext_clf.predict(X_val)),
    accuracy_score(y_val,svm_clf.predict(X_val))
)

0.9689 0.9714 0.9791


In [35]:
svm_clf.predict(X_val)

array(['6', '1', '5', ..., '4', '7', '7'], dtype=object)

In [36]:
r = rnd_clf.predict(X_val)
e = ext_clf.predict(X_val)
s = svm_clf.predict(X_val)



In [55]:
y_pred_val = np.c_[r,e,s].max(axis=1)

In [57]:
accuracy_score(y_val, y_pred_val)

0.9714

In [49]:
k[:5]

array([['6', '6', '6'],
       ['2', '2', '1'],
       ['5', '5', '5'],
       ['8', '8', '8'],
       ['0', '0', '0']], dtype=object)

In [44]:
r[:5]

array(['6', '2', '5', '8', '0'], dtype=object)

In [45]:
e[:5]

array(['6', '2', '5', '8', '0'], dtype=object)

In [46]:
s[:5]

array(['6', '1', '5', '8', '0'], dtype=object)

In [50]:
k[:5]

array([['6', '6', '6'],
       ['2', '2', '1'],
       ['5', '5', '5'],
       ['8', '8', '8'],
       ['0', '0', '0']], dtype=object)

In [56]:
y_pred_val

array(['6', '2', '5', ..., '4', '7', '7'], dtype=object)

In [58]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [59]:
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [96]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]

for estimator in estimators:
    print("Training:\t", estimator)
    estimator.fit(X_train, y_train)

Training:	 RandomForestClassifier(random_state=42)
Training:	 ExtraTreesClassifier(random_state=42)
Training:	 LinearSVC(max_iter=100, random_state=42, tol=20)
Training:	 MLPClassifier(random_state=42)


In [97]:
scores = [estimator.score(X_val, y_val) for estimator in estimators]

scores

[0.9687, 0.9722, 0.8297, 0.9666]

In [64]:
v = [k for k in zip(('random','extra','svc','mlp'),estimators)]

In [65]:
from sklearn.ensemble import VotingClassifier

voting = VotingClassifier (v)


In [66]:
voting

VotingClassifier(estimators=[('random',
                              RandomForestClassifier(random_state=42)),
                             ('extra', ExtraTreesClassifier(random_state=42)),
                             ('svc',
                              LinearSVC(max_iter=100, random_state=42, tol=20)),
                             ('mlp', MLPClassifier(random_state=42))])

In [67]:
voting.fit(X_train, y_train)

VotingClassifier(estimators=[('random',
                              RandomForestClassifier(random_state=42)),
                             ('extra', ExtraTreesClassifier(random_state=42)),
                             ('svc',
                              LinearSVC(max_iter=100, random_state=42, tol=20)),
                             ('mlp', MLPClassifier(random_state=42))])

In [69]:
voting.score(X_val,y_val)

0.9708

In [75]:
del voting.estimators_[2]

In [79]:
voting.set_params(svc=None)

VotingClassifier(estimators=[('random',
                              RandomForestClassifier(random_state=42)),
                             ('extra', ExtraTreesClassifier(random_state=42)),
                             ('svc', None),
                             ('mlp', MLPClassifier(random_state=42))])

In [80]:
voting.estimators

[('random', RandomForestClassifier(random_state=42)),
 ('extra', ExtraTreesClassifier(random_state=42)),
 ('svc', None),
 ('mlp', MLPClassifier(random_state=42))]

In [81]:
voting.estimators_

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 MLPClassifier(random_state=42)]

In [85]:
voting.score(X_val, y_val)

0.9718

In [83]:
voting.voting = 'soft'
voting.score(X_val, y_val)

0.9718

In [87]:
X_val_predictions = np.empty((len(X_val), len(estimators)))



(10000, 4)

In [120]:
X_val_predictions = np.c_[[estimator.predict(X_val) for estimator in estimators]].T

In [121]:
X_val_predictions

array([['6', '6', '6', '6'],
       ['2', '2', '2', '7'],
       ['5', '5', '5', '5'],
       ...,
       ['4', '4', '1', '1'],
       ['7', '7', '9', '7'],
       ['7', '7', '7', '7']], dtype=object)

In [118]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predictions, y_val)

RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

In [119]:
rnd_forest_blender.oob_score_

0.9731

In [123]:
X_test_predictions = np.c_[[estimator.predict(X_test) for estimator in estimators]].T
X_test_predictions

array([['2', '2', '2', '2'],
       ['0', '0', '0', '0'],
       ['2', '2', '4', '2'],
       ...,
       ['4', '4', '4', '4'],
       ['6', '6', '6', '6'],
       ['0', '0', '0', '0']], dtype=object)

In [124]:
accuracy_score(y_test, rnd_forest_blender.predict(X_test_predictions))

0.9688