In [1]:
import pandas as pd

from sklearn.base import RegressorMixin
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)
from sklearn.linear_model import (
    LinearRegression,
    LogisticRegression,
)
from sklearn.preprocessing import OneHotEncoder
from datasets.vowel import X_train, y_train, X_test, y_test

In [2]:
def error_rate(estimator, X_train, y_train, X_test, y_test):
    if isinstance(estimator, RegressorMixin):
        enc = OneHotEncoder()
        y_train = enc.fit_transform(y_train.to_frame()).toarray()
        y_test  = enc.transform(y_test.to_frame()).toarray()
    estimator.fit(X_train, y_train)
    row = dict()
    for col, X, y in [
        ('Training', X_train, y_train), ('Test', X_test, y_test),
    ]:
        y_pred = estimator.predict(X)
        if isinstance(estimator, RegressorMixin):
            y = y.argmax(axis=1)
            y_pred = y_pred.argmax(axis=1)
        row[col] = (y != y_pred).mean()
    return row

In [3]:
pd.set_option('precision', 2)
pd.DataFrame(data=[
    error_rate(LinearRegression(),
               X_train, y_train, X_test, y_test),
    error_rate(LinearDiscriminantAnalysis(),
               X_train, y_train, X_test, y_test),
    error_rate(QuadraticDiscriminantAnalysis(),
               X_train, y_train, X_test, y_test),
    error_rate(LogisticRegression(C=y_train.nunique(), max_iter=1000),
               X_train, y_train, X_test, y_test),
], index=[
    'Linear regression',
    'Linear discriminant analysis',
    'Quadratic discriminant analysis',
    'Logistic regression',
])

Unnamed: 0,Training,Test
Linear regression,0.48,0.67
Linear discriminant analysis,0.32,0.56
Quadratic discriminant analysis,0.01,0.53
Logistic regression,0.23,0.51
