In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, confusion_matrix, classification_report

In [2]:
wine_data = pd.read_csv('./Datasets/wine-quality/data.csv')

wine_train = wine_data.sample(frac = 0.8, random_state = 200)
wine_validation = wine_data.drop(wine_train.index)

label_train = wine_train[wine_train.keys()[-1]]
label_validation = wine_validation[wine_validation.keys()[-1]]

# wine_train = (wine_train - wine_data.mean())/wine_data.std()
# wine_validation = (wine_validation - wine_data.mean())/wine_data.std()

wine_train

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4280,7.4,0.390,0.23,7.0,0.033,29.0,126.0,0.99400,3.14,0.42,10.5,5
357,7.4,0.250,0.36,13.2,0.067,53.0,178.0,0.99760,3.01,0.48,9.0,6
3402,7.4,0.155,0.34,2.3,0.045,73.5,214.0,0.99340,3.18,0.61,9.9,7
1623,5.7,0.180,0.22,4.2,0.042,25.0,111.0,0.99400,3.35,0.39,9.4,5
3906,8.9,0.260,0.33,8.1,0.024,47.0,202.0,0.99558,3.13,0.46,10.8,6
2893,6.0,0.250,0.28,7.7,0.053,37.0,132.0,0.99489,3.06,0.50,9.4,6
3647,8.4,0.200,0.38,11.8,0.055,51.0,170.0,1.00040,3.34,0.82,8.9,6
149,6.3,0.210,0.28,1.5,0.051,46.0,142.0,0.99280,3.23,0.42,10.1,6
815,6.6,0.230,0.24,3.9,0.045,36.0,138.0,0.99220,3.15,0.64,11.3,7
3010,6.0,0.240,0.27,1.9,0.048,40.0,170.0,0.99380,3.64,0.54,10.0,7


In [3]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [4]:
def logistic(X, label, beta):
    num_of_iters = 300
    beta.shape = (num_of_classes+1, 1)
    Y = np.array(label, dtype = pd.Series)
    Y.shape = (len(Y), 1)
    Y = Y.astype(float)
    alpha = 0.001
    for _ in range(num_of_iters):
        z = X @ beta
        hypo = sigmoid(z)
        loss = np.subtract(hypo, Y)
        gradient = (alpha/len(Y)) * (X.T @ loss)
        beta -= gradient
    beta.shape = (num_of_classes+1, )
    return beta

In [5]:
def prediction(test_data):
    result_of_all = sigmoid(test_data @ classifiers.T)
#     print(result_of_all)
    predicted = []
    for index, row in result_of_all.iterrows():
        predicted.append(row.idxmax())
        break
    return predicted

In [6]:
num_of_classes = 11
x = wine_train.iloc[:, :-1].values
ones = np.ones(len(wine_train))
X = np.concatenate((ones[:, np.newaxis], x), axis = 1)
classifiers = np.zeros(shape = (num_of_classes, len(wine_data.keys())))
for classifier in range(num_of_classes):
    label_classifier = (label_train == classifier).astype(int)
    classifiers[classifier, :] = logistic(X, label_classifier, classifiers[classifier, :])

pred = prediction(wine_validation)

print("Training accuracy:", str(100 * np.mean(pred == label_validation)) + "%")

# classifiers

Training accuracy: 45.91836734693878%


# One vs One

In [7]:
def logistic_ovo(X, Y, beta):
    Y.shape = (len(Y), 1)
    beta.shape = (num_of_classes+1, 1)
    num_of_iters = 500
#     print(X.shape, Y.shape, beta.shape)
    alpha = 0.001
    for _ in range(num_of_iters):
        z = X @ beta
        hypo = sigmoid(z)
        loss = np.subtract(hypo, Y)
        gradient = (alpha/len(Y)) * (X.T @ loss)
        beta -= gradient
    beta.shape = (num_of_classes+1, )
    return beta

In [8]:
def predict_ovo(test_data):
    pred = []
    for index, row in test_data.iterrows():
        row_wise_pred = [0]* num_of_classes
        row = row.values
        for c in range(len(classifiers)):
            classifiers[c].shape = (num_of_classes+1, 1)
            row.shape = (num_of_classes+1, 1)
#             print(row.shape, classifier.T.shape)
            x = sigmoid(row.T @ classifiers[c])
            if x < 0.5:
                row_wise_pred[pair_list[c][0]] += 1
            else:
                row_wise_pred[pair_list[c][1]] += 1
        pred.append(np.argmax(row_wise_pred))
    return pred

In [9]:
wine_train = (wine_train - wine_data.mean())/wine_data.std()
wine_validation = (wine_validation - wine_data.mean())/wine_data.std()

num_of_classes = 11
x = wine_train.iloc[:, :-1].values
ones = np.ones(len(wine_train))
X = np.concatenate((ones[:, np.newaxis], x), axis = 1)

unique_classes = label_train.unique()
pair_list = []
for i in range(len(unique_classes)):
    for j in range(i+1, len(unique_classes)):
        pair_list.append([unique_classes[i], unique_classes[j]])
num_of_classifiers = len(pair_list)
classifiers = np.zeros(shape = (num_of_classifiers, len(wine_data.keys())))
# pair_list
for classifier in range(num_of_classifiers):
    xtemp1 = X[label_train == pair_list[classifier][0]]
    xtemp2 = X[label_train == pair_list[classifier][1]]
    ytemp1 = label_train[label_train == pair_list[classifier][0]]
    ytemp2 = label_train[label_train == pair_list[classifier][1]]
    temp_X = np.concatenate((xtemp1, xtemp2))
    temp_Y = np.concatenate((ytemp1, ytemp2))
    classifiers[classifier, :] = logistic_ovo(temp_X, temp_Y, classifiers[classifier, :])

predicted_ovo = predict_ovo(wine_validation)
print("Training accuracy:", str(100 * np.mean(predicted_ovo == label_validation)) + "%")


Training accuracy: 11.11111111111111%
