In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine


In [None]:
# Load data
dataObj = load_wine()
X = dataObj.data
y = dataObj.target


In [None]:
# Create DataFrame with features
dfori = pd.DataFrame(X)
dfori.columns = dataObj.feature_names


In [None]:
# Add class column
dfori.insert(loc=0, column="Class", value=y)


In [None]:
dfori.head()


In [None]:
# Filter class 0 and 1
filt = (dfori["Class"] == 0) | (dfori["Class"] == 1)
df = dfori.loc[filt]


In [None]:
# Extract two features
X = df[["alcohol", "malic_acid"]].values
y = df["Class"].values



In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=1, stratify=y
)



In [None]:
# Standardize data
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)


In [None]:
# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier

lr = LogisticRegression(C=0.005, random_state=0)
dt = DecisionTreeClassifier(max_depth=2, criterion="entropy", random_state=0)
knn = KNeighborsClassifier(n_neighbors=1, p=2, metric="minkowski")
estimators = [("lr", lr), ("dt", dt), ("knn", knn)]


vc = VotingClassifier(estimators=estimators, voting="soft")



In [None]:
# Training and displaying results
names = [
    "Logistic Regression",
    "Decision Tree",
    "K-Nearest Neighbor",
    "Voting Classifier",
]
clfs = [lr, dt, knn, vc]
accs = []
misses = []

for clf in clfs:
    clf.fit(X_train_std, y_train)
    y_pred = clf.predict(X_test_std)
    acc = accuracy_score(y_test, y_pred)
    sumMiss = (y_test != y_pred).sum()
    accs.append(acc)
    misses.append(sumMiss)

data = {"names": names, "ACC": accs, "miss": misses}

dfResult = pd.DataFrame(data)
display(dfResult)



In [None]:
# Decision surface
from itertools import product

all_clf = clfs
clf_labels = names

X_train = X_train_std
X_test = X_test_std

x_min = X_test[:, 0].min() - 1
x_max = X_test[:, 0].max() + 1
y_min = X_test[:, 1].min() - 1
y_max = X_test[:, 1].max() + 1

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))

f, axarr = plt.subplots(nrows=2, ncols=2, sharex="col", sharey="row", figsize=(10, 8))

for idx, clf, tt in zip(product([0, 1], [0, 1]), all_clf, clf_labels):

    clf.fit(X_train, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.3)

    axarr[idx[0], idx[1]].scatter(
        X_test[y_test == 0, 0], X_test[y_test == 0, 1], c="blue", marker="^", s=50
    )

    axarr[idx[0], idx[1]].scatter(
        X_test[y_test == 1, 0], X_test[y_test == 1, 1], c="red", marker="o", s=50
    )

    axarr[idx[0], idx[1]].set_title(tt)
    axarr[idx[0], idx[1]].set_xlabel("Alcohol")
    axarr[idx[0], idx[1]].set_ylabel("Malic Acid")

plt.tight_layout(pad=3.0)
plt.show()
