# Classification on Wine Quality Dataset

## Imports

In [1]:
import numpy as np
from tiblib import load_wine
from tiblib.model_selection import grid_cv_multiprior
from tiblib.preprocessing import Gaussianizer, StandardScaler, PCA
from tiblib.classification import GaussianClassifier

## Gaussian Classifier

In [2]:
X_train, X_test, y_train, y_test = load_wine()

model = GaussianClassifier
hyperparams = {'tied':[False, True],
               'naive':[False, True]}

gaussianizer = Gaussianizer()
scaler = StandardScaler()
pca1 = PCA(n_dims=9)
pca2 = PCA(n_dims=5)
preprocessings = [
    [],
    [gaussianizer],
    [scaler],
    [scaler, pca1],
    [scaler, pca2],
    [gaussianizer, scaler, pca1],
    [gaussianizer, scaler, pca2],
]
prefix = 'gc'
pis = [0.1, 0.5, 0.9]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
Full		& 0.797	& 0.313	& 0.839	\\
Naive		& 0.856	& 0.418	& 0.881	\\
Tied		& 0.818	& 0.338	& 0.741	\\
Naive, Tied		& 0.860	& 0.405	& 0.944	\\
Gaussianizer
Showing results for pi = [0.1, 0.5, 0.9]
Full		& 0.772	& 0.299	& 0.772	\\
Naive		& 0.863	& 0.445	& 0.863	\\
Tied		& 0.786	& 0.351	& 0.849	\\
Naive, Tied		& 0.866	& 0.443	& 0.945	\\
StandardScaler
Showing results for pi = [0.1, 0.5, 0.9]
Full		& 0.784	& 0.310	& 0.855	\\
Naive		& 0.867	& 0.419	& 0.926	\\
Tied		& 0.841	& 0.335	& 0.759	\\
Naive, Tied		& 0.868	& 0.409	& 0.947	\\
StandardScaler_PCA (d=9)
Showing results for pi = [0.1, 0.5, 0.9]
Full		& 0.829	& 0.317	& 0.810	\\
Naive		& 0.815	& 0.393	& 0.866	\\
Tied		& 0.835	& 0.342	& 0.746	\\
Naive, Tied		& 0.823	& 0.339	& 0.782	\\
StandardScaler_PCA (d=5)
Showing results for pi = [0.1, 0.5, 0.9]
Full		& 0.865	& 0.401	& 0.893	\\
Naive		& 0.862	& 0.434	& 0.898	\\
Tied		& 0.854	& 0.388	& 0.914	\\
Naive, Tied		& 0.854	& 0.387	& 0.923	\\
Gaussi

## Logistic Regression

In [3]:
from tiblib.classification import LogisticRegression

X_train, X_test, y_train, y_test = load_wine()

model = LogisticRegression
hyperparams = {'l':[1e-1, 1e-2, 1e-3, 1e-4]}

gaussianizer = Gaussianizer()
scaler = StandardScaler()
pca1 = PCA(n_dims=9)
pca2 = PCA(n_dims=5)
preprocessings = [
    [],
    [scaler],
    [scaler, pca1],
    [scaler, pca2]
]
prefix = 'lr'
pis = [0.1, 0.5, 0.9]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.874	& 0.423	& 0.974	\\
LogReg ($\lambda = 0.01$)		& 0.844	& 0.400	& 0.816	\\
LogReg ($\lambda = 0.001$)		& 0.826	& 0.354	& 0.684	\\
LogReg ($\lambda = 0.0001$)		& 0.856	& 0.360	& 0.643	\\
StandardScaler
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.851	& 0.355	& 0.808	\\
LogReg ($\lambda = 0.01$)		& 0.845	& 0.351	& 0.666	\\
LogReg ($\lambda = 0.001$)		& 0.847	& 0.358	& 0.693	\\
LogReg ($\lambda = 0.0001$)		& 0.852	& 0.356	& 0.666	\\
StandardScaler_PCA (d=9)
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.842	& 0.356	& 0.779	\\
LogReg ($\lambda = 0.01$)		& 0.845	& 0.347	& 0.660	\\
LogReg ($\lambda = 0.001$)		& 0.854	& 0.361	& 0.670	\\
LogReg ($\lambda = 0.0001$)		& 0.840	& 0.352	& 0.661	\\
StandardScaler_PCA (d=5)
Showing results for pi = [0.1, 0.5, 0.9]
LogReg ($\lambda = 0.1$)		& 0.844	& 0.387	& 0.881	\\
LogReg ($\lambda = 0.01$)		& 0.856	& 0.383	& 0.847	\\

## Quadratic Logistic Regression

In [3]:
from tiblib.classification import QuadraticLogisticRegression

X_train, X_test, y_train, y_test = load_wine()

model = QuadraticLogisticRegression
hyperparams = {'l':[1e-1, 1e-2, 1e-3, 1e-4]}

gaussianizer = Gaussianizer()
scaler = StandardScaler()
pca1 = PCA(n_dims=9)
pca2 = PCA(n_dims=5)
preprocessings = [
    [],
    [scaler],
    [scaler, pca1],
    [scaler, pca2]
]
prefix = 'lr'
pis = [0.1, 0.5, 0.9]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.858	& 0.394	& 0.848	\\
QuadLogReg ($\lambda = 0.01$)		& 0.838	& 0.387	& 0.811	\\
QuadLogReg ($\lambda = 0.001$)		& 0.830	& 0.390	& 0.840	\\
QuadLogReg ($\lambda = 0.0001$)		& 0.865	& 0.375	& 0.842	\\
StandardScaler
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.811	& 0.316	& 0.795	\\
QuadLogReg ($\lambda = 0.01$)		& 0.807	& 0.292	& 0.670	\\
QuadLogReg ($\lambda = 0.001$)		& 0.814	& 0.268	& 0.635	\\
QuadLogReg ($\lambda = 0.0001$)		& 0.739	& 0.287	& 0.645	\\
StandardScaler_PCA (d=9)
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.843	& 0.318	& 0.796	\\
QuadLogReg ($\lambda = 0.01$)		& 0.792	& 0.299	& 0.706	\\
QuadLogReg ($\lambda = 0.001$)		& 0.804	& 0.284	& 0.669	\\
QuadLogReg ($\lambda = 0.0001$)		& 0.819	& 0.297	& 0.758	\\
StandardScaler_PCA (d=5)
Showing results for pi = [0.1, 0.5, 0.9]
QuadLogReg ($\lambda = 0.1$)		& 0.838	& 0.356	& 0.896	\\
Q

## GMM

In [4]:
from tiblib.classification import GaussianMixtureClassifier

X_train, X_test, y_train, y_test = load_wine()

model = GaussianMixtureClassifier
hyperparams = {'tied':[False, True],
               'diag':[False, True],
               'n_components':[4,8,16],
               'alpha':[0.1, 0.5, 1]}
prefix = 'gmm'
pis = [0.1, 0.5, 0.9]
gaussianizer = Gaussianizer()
scaler = StandardScaler()
pca1 = PCA(n_dims=9)
pca2 = PCA(n_dims=5)
preprocessings = [
    [],
    [gaussianizer],
    [scaler],
]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
GMM (4 components, $\alpha = 0.1$)		& 0.812	& 0.326	& 0.748	\\
GMM (4 components, $\alpha = 0.5$)		& 0.847	& 0.327	& 0.695	\\
GMM (4 components, $\alpha = 1$)		& 0.772	& 0.321	& 0.740	\\
GMM (8 components, $\alpha = 0.1$)		& 0.842	& 0.330	& 0.786	\\
GMM (8 components, $\alpha = 0.5$)		& 0.784	& 0.325	& 0.802	\\
GMM (8 components, $\alpha = 1$)		& 0.725	& 0.300	& 0.775	\\
GMM (16 components, $\alpha = 0.1$)		& 0.850	& 0.350	& 0.893	\\
GMM (16 components, $\alpha = 0.5$)		& 0.830	& 0.346	& 0.950	\\
GMM (16 components, $\alpha = 1$)		& 0.679	& 0.303	& 0.852	\\
GMM (Diag, 4 components, $\alpha = 0.1$)		& 0.801	& 0.400	& 0.847	\\
GMM (Diag, 4 components, $\alpha = 0.5$)		& 0.865	& 0.376	& 0.828	\\
GMM (Diag, 4 components, $\alpha = 1$)		& 0.837	& 0.316	& 0.781	\\
GMM (Diag, 8 components, $\alpha = 0.1$)		& 0.894	& 0.388	& 0.852	\\
GMM (Diag, 8 components, $\alpha = 0.5$)		& 0.851	& 0.375	& 0.839	\\
GMM (Diag, 8 components, $\alpha = 1$)		&

## SVC

In [3]:
from tiblib.classification import SVC

X_train, X_test, y_train, y_test = load_wine()

model = SVC
hyperparams = {'C':[1e-1, 1, 10],
               'kernel': ['linear', 'poly'],
               }
prefix = 'svm'
pis = [0.1, 0.5, 0.9]
gaussianizer = Gaussianizer()
scaler = StandardScaler()
preprocessings = [
    [],
    [gaussianizer],
    [scaler],
]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
SVC (Linear, $C = 0.1$)		& 0.927	& 0.427	& 0.879	\\
SVC (Poly, $C = 0.1$)		& 1.000	& 0.990	& 1.065	\\
SVC (Linear, $C = 1$)		& 1.000	& 0.644	& 0.932	\\
SVC (Poly, $C = 1$)		& 1.000	& 0.980	& 1.019	\\
SVC (Linear, $C = 10$)		& 1.000	& 0.914	& 1.006	\\
SVC (Poly, $C = 10$)		& 1.000	& 0.984	& 1.050	\\
Gaussianizer
Showing results for pi = [0.1, 0.5, 0.9]
SVC (Linear, $C = 0.1$)		& 0.843	& 0.361	& 0.803	\\
SVC (Poly, $C = 0.1$)		& 1.000	& 0.983	& 1.081	\\
SVC (Linear, $C = 1$)		& 0.838	& 0.352	& 0.817	\\
SVC (Poly, $C = 1$)		& 0.998	& 0.949	& 1.112	\\
SVC (Linear, $C = 10$)		& 0.861	& 0.367	& 0.896	\\
SVC (Poly, $C = 10$)		& 1.000	& 0.993	& 1.050	\\
StandardScaler
Showing results for pi = [0.1, 0.5, 0.9]
SVC (Linear, $C = 0.1$)		& 0.848	& 0.340	& 0.689	\\
SVC (Poly, $C = 0.1$)		& 1.000	& 0.972	& 1.034	\\
SVC (Linear, $C = 1$)		& 0.851	& 0.348	& 0.695	\\
SVC (Poly, $C = 1$)		& 1.000	& 0.982	& 1.065	\\
SVC (Linear, $C = 10$)		& 0.869	& 0.36

In [4]:
import numpy as np
from tiblib.classification import SVC

X_train, X_test, y_train, y_test = load_wine()

model = SVC
hyperparams = {'C':[1e-1, 1, 10],
               'kernel': ['radial'],
               'gamma' : [1/np.e, 1/np.e**2, 1/np.e**3]
               }
prefix = 'svm_rbf'
pis = [0.1, 0.5, 0.9]
gaussianizer = Gaussianizer()
scaler = StandardScaler()
preprocessings = [
    [],
    [gaussianizer],
    [scaler],
]
for pr in preprocessings:
    if len(pr) > 0:
        filename = '_'.join([str(p) for p in pr])
    else:
        filename = 'no_preproc'
    print(filename) # Prints current preprocessings in string form
    grid_cv_multiprior(X_train, y_train, pis=pis,
            preprocessing=pr,
            classifier=model, hyperparams=hyperparams, filename=f'results/results_{prefix}_{filename}.csv')

no_preproc
Showing results for pi = [0.1, 0.5, 0.9]
SVC (RBF, $C = 0.1$, $\gamma = 0.36787944117144233$)		& 0.792	& 0.525	& 0.922	\\
SVC (RBF, $C = 0.1$, $\gamma = 0.1353352832366127$)		& 0.867	& 0.453	& 0.795	\\
SVC (RBF, $C = 0.1$, $\gamma = 0.04978706836786395$)		& 0.962	& 0.508	& 0.934	\\
SVC (RBF, $C = 1$, $\gamma = 0.36787944117144233$)		& 0.649	& 0.463	& 0.718	\\
SVC (RBF, $C = 1$, $\gamma = 0.1353352832366127$)		& 0.684	& 0.450	& 0.718	\\
SVC (RBF, $C = 1$, $\gamma = 0.04978706836786395$)		& 0.775	& 0.406	& 0.846	\\
SVC (RBF, $C = 10$, $\gamma = 0.36787944117144233$)		& 0.672	& 0.473	& 0.712	\\
SVC (RBF, $C = 10$, $\gamma = 0.1353352832366127$)		& 0.710	& 0.426	& 0.746	\\
SVC (RBF, $C = 10$, $\gamma = 0.04978706836786395$)		& 0.736	& 0.387	& 0.851	\\
Gaussianizer
Showing results for pi = [0.1, 0.5, 0.9]
SVC (RBF, $C = 0.1$, $\gamma = 0.36787944117144233$)		& 0.754	& 0.289	& 0.730	\\
SVC (RBF, $C = 0.1$, $\gamma = 0.1353352832366127$)		& 0.715	& 0.286	& 0.697	\\
SVC (RBF, $C = 0

## Calibration

In [7]:
from tiblib.model_selection import Calibrate
from tiblib.classification import QuadraticLogisticRegression, GaussianClassifier
from tiblib.classification import GaussianMixtureClassifier, SVC, Pipeline
from tiblib.preprocessing import Gaussianizer, StandardScaler, PCA

X_train, _, y_train, _ = load_wine()

g = Gaussianizer()
ss = StandardScaler()
pca = PCA(n_dims=9)
gc = GaussianClassifier()
qrl = QuadraticLogisticRegression(l=1e-3)
svm1 = SVC(kernel='radial', gamma=1/np.e**2, C=10)
svm2 = SVC(kernel='radial', gamma=1/np.e, C=1)
gmm1 = GaussianMixtureClassifier(n_components=8)
gmm2 = GaussianMixtureClassifier(n_components=16, tied=True)

model1 = Pipeline(g, gc)
model2 = Pipeline([ss, pca], gc)
model3 = Pipeline([ss], qrl)
model4 = Pipeline([ss, pca], qrl)
model5 = Pipeline(ss, svm1)
model6 = Pipeline(ss, svm2)
model7 = Pipeline(ss, gmm1)
model8 = Pipeline(g, gmm2)

models = [model1, model2, model3, model4, model5, model6, model7, model8]
names = ['gc1', 'gc2', 'qlr1', 'qlr2', 'svm1', 'svm2', 'gmm1', 'gmm2']

for n, m in zip(names, models):
    min_dcf, act_dcf, cal_dcf, scores, cal_scores = Calibrate(m, X_train, y_train)
    print(f'{n} & {min_dcf:.3f} & {act_dcf:.3f} & {cal_dcf:.3f}')
    np.save(f'results/scores_{n}', scores)
    np.save(f'results/cal_scores_{n}', cal_scores)

gc1 & 0.311 & 0.330 & 0.325
gc2 & 0.327 & 0.354 & 0.337
qrl1 & 0.277 & 0.292 & 0.297
qrl2 & 0.301 & 0.307 & 0.304
svm1 & 0.243 & 0.249 & 0.246
svm2 & 0.231 & 0.233 & 0.238
gmm1 & 0.319 & 0.323 & 0.321
gmm2 & 0.334 & 0.358 & 0.338


## Fusion

In [28]:
from tiblib.model_selection import Fusion

X_train, _, y_train, _ = load_wine()

model_names = ['svm2', 'qlr1', 'gmm1']
scores = []
for n in model_names:
    scores.append(np.load(f'results/cal_scores_{n}.npy').reshape(-1,1))
scores = np.concatenate(scores, axis=1)

min_dcf, act_dcf, fused_score = Fusion(scores,y_train)
print(f'QLR + SVM + GMM & {min_dcf:.3} & {act_dcf:.3} \\\\')
np.save(f'results/fusion_scores_{"_".join(model_names)}', fused_score)

QLR + SVM + GMM & 0.226 & 0.228 \\


In [29]:
from tiblib.model_selection import Fusion

X_train, _, y_train, _ = load_wine()

model_names = ['svm2', 'qlr1']
scores = []
for n in model_names:
    scores.append(np.load(f'results/cal_scores_{n}.npy').reshape(-1,1))
scores = np.concatenate(scores, axis=1)

min_dcf, act_dcf, fused_score = Fusion(scores,y_train)
print(f'QLR + SVM & {min_dcf:.3} & {act_dcf:.3} \\\\')
np.save(f'results/fusion_scores_{"_".join(model_names)}', fused_score)

QLR + SVM & 0.228 & 0.236 \\


In [30]:
from tiblib.model_selection import Fusion

X_train, _, y_train, _ = load_wine()

model_names = ['svm2', 'gmm1']
scores = []
for n in model_names:
    scores.append(np.load(f'results/cal_scores_{n}.npy').reshape(-1,1))
scores = np.concatenate(scores, axis=1)

min_dcf, act_dcf, fused_score = Fusion(scores,y_train)
print(f'SVM + GMM & {min_dcf:.3} & {act_dcf:.3} \\\\')
np.save(f'results/fusion_scores_{"_".join(model_names)}', fused_score)

SVM + GMM & 0.228 & 0.237 \\


In [38]:
from tiblib.model_selection import Fusion

X_train, _, y_train, _ = load_wine()

model_names = ['qlr1', 'gmm1']
scores = []
for n in model_names:
    scores.append(np.load(f'results/cal_scores_{n}.npy').reshape(-1,1))
scores = np.concatenate(scores, axis=1)

min_dcf, act_dcf, fused_score = Fusion(scores,y_train)
print(f'QLR + GMM & {min_dcf:.3} & {act_dcf:.3} \\\\')
np.save(f'results/fusion_scores_{"_".join(model_names)}', fused_score)

QLR + GMM & 0.283 & 0.293 \\
