In [34]:
from sklearn.datasets import make_moons, load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

In [2]:
moons_data = make_moons(n_samples=10000, noise=0.4)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(moons_data[0], moons_data[1], 
                                                   test_size = 0.2, random_state = 42)

In [4]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

In [5]:
voting_clf = VotingClassifier(
            estimators=[('lr', log_clf), ('rf', svm_clf), ('svc', svm_clf)], 
            voting='hard')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()), ('rf', SVC()),
                             ('svc', SVC())])

In [6]:
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.829
RandomForestClassifier 0.839
SVC 0.8605
VotingClassifier 0.8605


In [7]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, 
                           max_samples = 100, bootstrap=True, n_jobs = -1)

In [8]:
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1)

In [9]:
y_pred = bag_clf.predict(X_test)

In [10]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, 
                           oob_score = True, max_samples = 100, bootstrap=True, n_jobs = -1)

In [11]:
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1, oob_score=True)

In [12]:
bag_clf.oob_score_

0.859875

In [13]:
y_pred = bag_clf.predict(X_test)

In [14]:
accuracy_score(y_test, y_pred)

0.8605

In [15]:
bag_clf.oob_decision_function_

array([[0.07535642, 0.92464358],
       [0.02626263, 0.97373737],
       [0.04887984, 0.95112016],
       ...,
       [0.95731707, 0.04268293],
       [0.95362903, 0.04637097],
       [0.0583501 , 0.9416499 ]])

In [16]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes = 16, n_jobs=-1)

In [17]:
rnd_clf.fit(X_train, y_train)

RandomForestClassifier(max_leaf_nodes=16, n_estimators=500, n_jobs=-1)

In [18]:
y_pred = rnd_clf.predict(X_test)

In [19]:
accuracy_score(y_test, y_pred)

0.856

In [20]:
iris = load_iris()

In [21]:
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)

In [22]:
rnd_clf.fit(iris["data"], iris["target"])

RandomForestClassifier(n_estimators=500, n_jobs=-1)

In [23]:
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.09310263262009509
sepal width (cm) 0.02442416945293038
petal length (cm) 0.4479285234279348
petal width (cm) 0.4345446744990398


In [24]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm="SAMME.R", learning_rate=0.5)

In [25]:
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)

In [29]:
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(moons_data[0], moons_data[1])

DecisionTreeRegressor(max_depth=2)

In [31]:
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=0.1)

In [32]:
gbrt.fit(moons_data[0], moons_data[1])

GradientBoostingRegressor(max_depth=2, n_estimators=3)

In [33]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True)

In [35]:
min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_test)
    val_error = mean_squared_error(y_test, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break