In [None]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

## Data maken m.b.v. moons

In [None]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)

In [None]:
df = pd.DataFrame(X)
df["label"] = y
sns.scatterplot(data=df, x=0, y=1, hue="label")
plt.xlabel("Feature x1")
plt.ylabel("Feature x2")
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Ensemble aanmaken

In [None]:
voting_clf = VotingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42))
    ]
)
voting_clf.fit(X_train, y_train)

## Losse scores

In [None]:
for name, clf in voting_clf.named_estimators_.items():
    print(name, "=", clf.score(X_test, y_test))

In [None]:
X_test[:1]

In [None]:
voting_clf.predict(X_test[:1])

In [None]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

## Hard voting

In [None]:
voting_clf.score(X_test, y_test)

## Soft voting
Hiervoor is het eerst nodig om de SVC om te zetten zodat hij probabilities terug gaat geven (i.p.v. 0 of 1)

In [None]:
voting_clf.named_estimators["svc"].probability = True

In [None]:
voting_clf.voting = "soft"
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)