In [7]:
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, './algorithms')

## Import Wine Dataset

In [8]:
data = np.genfromtxt('winequality-white.csv',delimiter=';')[1:]
print(data.shape)

(4898, 12)


In [9]:
X = data[:,:11]
y = data[:,11]
print(X.shape, y.shape)

(4898, 11) (4898,)


In [10]:
def normalize(X):
    X_flat = flatten(X)
    mu = X_flat.mean(axis=0)
    return [x - mu for x in X]
def flatten(X):
    X_flat = []

    for x in X:
        X_flat += list(x)

    return np.array(X_flat)

## Define Model
I used Logistic Regression in order to classify the wines according to some standard (1-10). Logistic Regression can be used to compute regression as well as classification. It is especially useful when the target is categorical.

NOTE: For multi-class classification, LR uses one-vs-rest mechanism to classify.


In [12]:
from sklearn.linear_model import LogisticRegression

In [13]:
X = normalize(X)
model = LogisticRegression(fit_intercept=False)
model.fit(X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=False,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [14]:
import random as rand

subsets = []

for i in range(100):        
    subsets.append(rand.sample(list(data), 10))

subsets = np.asarray(subsets)
subsets.shape

(100, 10, 12)

## Given Loss Function

The loss function as detailed in the paper is implemented below.

In [15]:
# Detailed in the paper
def svm_loss(preds, y, delta=0):
    score_correct = preds[y.argmax()]
    
    loss = 0
    
    for pred in range(len(preds)):
        loss += max(0, pred + delta - score_correct)            
            
    return loss

## Proposed Loss Function and Model

LR is now regularized with Ridge Regression and loss is calculated accordingly

In [16]:
model2 = LogisticRegression(fit_intercept=False,C=2, penalty='l2')
model2.fit(X, y)

def my_loss(preds, ys, delta = 0):
    correct = ys.argmax()
    score_correct = preds[correct]

    loss = 0
    for i, pred in enumerate(preds):
        loss += max(0, pred + delta - score_correct)
    return loss

## Calculate Loss

In [17]:
loss = []
reg_loss = []
print("Computing losses...")
for subset in subsets:
    preds = model.predict(subset[:, :11])
    my_preds = model2.predict(subset[:, :11])

    score = np.argmax(preds)
    best_wine = subset[score][11]

    loss.append(svm_loss(preds, subset[:,11]))
    reg_loss.append(my_loss(my_preds, subset[:,11]))

Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit.

## Visualize results

In [None]:
plt.figure(figsize=(20,20))
plt.plot(loss, label="Given loss")
plt.plot(reg_loss, label="Regularized")
plt.legend()
plt.show()

Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit.