In [1]:
import numpy as np
from classifiers import *

# Load Dataset

In [17]:
train_path = 'datasets/mnist_train.csv'
test_path = 'datasets/mnist_test.csv'
y_col = 0

In [18]:
def get_features_and_labels(dataset, y_col):
    y = dataset[:, y_col:y_col+1]
    selector = [col for col in range(dataset.shape[1]) if col != y_col]
    X = dataset[:, selector]
    return X, y

In [81]:
## Import data and get sets
train_set = np.genfromtxt(train_path, delimiter=',')
test_set = np.genfromtxt(test_path, delimiter=',')

train_X, train_y = get_features_and_labels(train_set, y_col)
test_X, test_y = get_features_and_labels(test_set, y_col)

In [82]:
# Optional: Truncate datasets/mess around with them as needed
n = 1000
train_X, train_y = train_X[0:n, :], train_y[0:n, :]
test_X, test_y = test_X[0:n, :], test_y[0:n, :]

#Normalize Data
train_X, train_y = train_X - np.mean(train_X) , train_y
test_X, test_y = test_X - np.mean(test_X) , test_y

In [87]:
n = test_y.size
train_y = train_y.astype(int)
test_y = test_y.astype(int)

# Run Quantile Regression 
Learning $3 \lg k$ quantiles

In [88]:
def weighted_absolute_loss(u, y, alpha):
    y = np.array(y.T[0]) ## y is given as a column matrix, but predictions are not
    zs = np.zeros_like(y)
    return np.mean((1 - alpha) * np.maximum((u - y), zs) + alpha * np.maximum((y - u), zs))

In [89]:
def mode(array):
    most = max(list(map(array.count, array)))
    return list(set(filter(lambda x: array.count(x) == most, array)))

In [90]:
ATs = []
AT_predictions = []
s = int(3 * np.log2(np.unique(train_y).size)) # Number of quantiles
for i in range(1, s):
    a = i/s

    clf6 = QuantileAT(gamma=a, alpha=1., kernel_type='linear', kernel_param=1, loss_function='logistic')
    clf6.fit(train_X, train_y)
    ATpredictions = clf6.predict(test_X)
    print('Weighted Loss of QuantileAT, gamma=' + str(a) + ' %s' %
          weighted_absolute_loss(ATpredictions, test_y, a))
    print('0-1 Loss of QuantileAT, gamma=' + str(a) + ' %s' %
          metrics.zero_one_loss(ATpredictions, test_y))
    ATs.append(clf6)
    AT_predictions.append(ATpredictions)

  y = column_or_1d(y, warn=True)


Weighted Loss of LogisticQuantileAT, gamma=0.1111111111111111 0.4457777777777777
0-1 Loss of LogisticQuantileAT, gamma=0.1111111111111111 0.802
Weighted Loss of LogisticQuantileAT, gamma=0.2222222222222222 0.6322222222222221
0-1 Loss of LogisticQuantileAT, gamma=0.2222222222222222 0.78
Weighted Loss of LogisticQuantileAT, gamma=0.3333333333333333 0.7686666666666667
0-1 Loss of LogisticQuantileAT, gamma=0.3333333333333333 0.7969999999999999
Weighted Loss of LogisticQuantileAT, gamma=0.4444444444444444 0.8154444444444444
0-1 Loss of LogisticQuantileAT, gamma=0.4444444444444444 0.794
Weighted Loss of LogisticQuantileAT, gamma=0.5555555555555556 0.8216666666666668
0-1 Loss of LogisticQuantileAT, gamma=0.5555555555555556 0.8109999999999999
Weighted Loss of LogisticQuantileAT, gamma=0.6666666666666666 0.7786666666666667
0-1 Loss of LogisticQuantileAT, gamma=0.6666666666666666 0.839
Weighted Loss of LogisticQuantileAT, gamma=0.7777777777777778 0.665
0-1 Loss of LogisticQuantileAT, gamma=0.777

In [91]:
AT_preds = np.zeros((n, s))
for i in range(1, s):
    AT_preds[:, i] = AT_predictions[i - 1]
AT_preds = AT_preds.astype(int)
quantile_predictions = np.zeros((n, 1))

for i in range(1, n):
    quantiles = AT_preds[i, :]
    quantile_predictions[i, :] = np.random.choice(mode(quantiles.tolist()))
    
metrics.zero_one_loss(quantile_predictions, test_y)

0.808

In [92]:
ATs = []
AT_predictions = []
s = int(3 * np.log2(np.unique(train_y).size)) # Number of quantiles
for i in range(1, s):
    a = i/s

    clf6 = QuantileAT(gamma=a, alpha=1., kernel_type='rbf', kernel_param=1, loss_function='logistic')
    clf6.fit(train_X, train_y)
    ATpredictions = clf6.predict(test_X)
    print('Weighted Loss of QuantileAT, gamma=' + str(a) + ' %s' %
          weighted_absolute_loss(ATpredictions, test_y, a))
    print('0-1 Loss of QuantileAT, gamma=' + str(a) + ' %s' %
          metrics.zero_one_loss(ATpredictions, test_y))
    ATs.append(clf6)
    AT_predictions.append(ATpredictions)

  y = column_or_1d(y, warn=True)


Weighted Loss of LogisticQuantileAT, gamma=0.1111111111111111 0.4546666666666666
0-1 Loss of LogisticQuantileAT, gamma=0.1111111111111111 0.874
Weighted Loss of LogisticQuantileAT, gamma=0.2222222222222222 0.8131111111111111
0-1 Loss of LogisticQuantileAT, gamma=0.2222222222222222 0.884
Weighted Loss of LogisticQuantileAT, gamma=0.3333333333333333 1.0716666666666665
0-1 Loss of LogisticQuantileAT, gamma=0.3333333333333333 0.884
Weighted Loss of LogisticQuantileAT, gamma=0.4444444444444444 1.2127777777777777
0-1 Loss of LogisticQuantileAT, gamma=0.4444444444444444 0.893
Weighted Loss of LogisticQuantileAT, gamma=0.5555555555555556 1.3025555555555557
0-1 Loss of LogisticQuantileAT, gamma=0.5555555555555556 0.913
Weighted Loss of LogisticQuantileAT, gamma=0.6666666666666666 1.1523333333333332
0-1 Loss of LogisticQuantileAT, gamma=0.6666666666666666 0.913
Weighted Loss of LogisticQuantileAT, gamma=0.7777777777777778 0.871
0-1 Loss of LogisticQuantileAT, gamma=0.7777777777777778 0.901
Weigh

In [93]:
AT_preds = np.zeros((n, s))
for i in range(1, s):
    AT_preds[:, i] = AT_predictions[i - 1]
AT_preds = AT_preds.astype(int)
quantile_predictions = np.zeros((n, 1))

for i in range(1, n):
    quantiles = AT_preds[i, :]
    quantile_predictions[i, :] = np.random.choice(mode(quantiles.tolist()))
    
metrics.zero_one_loss(quantile_predictions, test_y)

0.902

# Run Multiclass Algorithms (for Comparison)

In [50]:
# Standard Logisitic regression
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial').fit(train_X, train_y)
metrics.zero_one_loss(clf.predict(test_X), test_y)

  y = column_or_1d(y, warn=True)


0.18100000000000005