# Classification on Wine Quality Dataset - Model selection

## Imports

In [1]:
from tiblib import load_wine
from tiblib.model_selection import grid_cv_multiprior
from tiblib.preprocessing import Gaussianizer, StandardScaler, PCA
from tiblib.classification import LogisticRegression
from tiblib.classification import QuadraticLogisticRegression

## Logistic Regression

In [3]:

X_train, X_test, y_train, y_test = load_wine()

model = LogisticRegression
hyperparams = {'l':[1e-1, 1e-2, 1e-3, 1e-4]}

gaussianizer = Gaussianizer()
scaler = StandardScaler()
pca1 = PCA(n_dims=9)
pca2 = PCA(n_dims=5)
preprocessings = [
    [],
    [scaler],
    [scaler, pca1],
    [scaler, pca2]
]
prefix = 'lr'
pis = [0.1, 0.5, 0.9]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.874	& 0.423	& 0.974	\\
LogReg ($\lambda = 0.01$)		& 0.844	& 0.400	& 0.816	\\
LogReg ($\lambda = 0.001$)		& 0.826	& 0.354	& 0.684	\\
LogReg ($\lambda = 0.0001$)		& 0.856	& 0.360	& 0.643	\\
StandardScaler
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.851	& 0.355	& 0.808	\\
LogReg ($\lambda = 0.01$)		& 0.845	& 0.351	& 0.666	\\
LogReg ($\lambda = 0.001$)		& 0.847	& 0.358	& 0.693	\\
LogReg ($\lambda = 0.0001$)		& 0.852	& 0.356	& 0.666	\\
StandardScaler_PCA (d=9)
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.842	& 0.356	& 0.779	\\
LogReg ($\lambda = 0.01$)		& 0.845	& 0.347	& 0.660	\\
LogReg ($\lambda = 0.001$)		& 0.854	& 0.361	& 0.670	\\
LogReg ($\lambda = 0.0001$)		& 0.840	& 0.352	& 0.661	\\
StandardScaler_PCA (d=5)
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.844	& 0.387	& 0.881	\\
LogReg ($\lambda = 0.01$)		& 0.856	& 0.383	& 0.847	\\

## Quadratic Logistic Regression

In [3]:
X_train, X_test, y_train, y_test = load_wine()

model = QuadraticLogisticRegression
hyperparams = {'l':[1e-1, 1e-2, 1e-3, 1e-4]}

gaussianizer = Gaussianizer()
scaler = StandardScaler()
pca1 = PCA(n_dims=9)
pca2 = PCA(n_dims=5)
preprocessings = [
    [],
    [scaler],
    [scaler, pca1],
    [scaler, pca2]
]
prefix = 'lr'
pis = [0.1, 0.5, 0.9]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')


no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.858	& 0.394	& 0.848	\\
QuadLogReg ($\lambda = 0.01$)		& 0.838	& 0.387	& 0.811	\\
QuadLogReg ($\lambda = 0.001$)		& 0.830	& 0.390	& 0.840	\\
QuadLogReg ($\lambda = 0.0001$)		& 0.865	& 0.375	& 0.842	\\
StandardScaler
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.811	& 0.316	& 0.795	\\
QuadLogReg ($\lambda = 0.01$)		& 0.807	& 0.292	& 0.670	\\
QuadLogReg ($\lambda = 0.001$)		& 0.814	& 0.268	& 0.635	\\
QuadLogReg ($\lambda = 0.0001$)		& 0.739	& 0.287	& 0.645	\\
StandardScaler_PCA (d=9)
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.843	& 0.318	& 0.796	\\
QuadLogReg ($\lambda = 0.01$)		& 0.792	& 0.299	& 0.706	\\
QuadLogReg ($\lambda = 0.001$)		& 0.804	& 0.284	& 0.669	\\
QuadLogReg ($\lambda = 0.0001$)		& 0.819	& 0.297	& 0.758	\\
StandardScaler_PCA (d=5)
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.838	& 0.356	& 0.896	\\
Q