In [1]:
import numpy as np
import numpy.linalg as lia
import pandas as pd
import matplotlib as plt

In [2]:
from sklearn import datasets
digits = datasets.load_digits()

In [3]:
print(len(digits.data))
print(digits.target.size)

1797
1797


In [4]:
from sklearn.datasets import fetch_openml
wine = fetch_openml(name='wine', version=1)

In [5]:
print(len(wine.data))
print(wine.target.size)

178
178


In [6]:
# normalization of wine data via division of each feature by its max value
digits_data_norm = []
for col in digits.data:
    col_norm = col/np.max(col)
    digits_data_norm.append(col_norm)

digits.data = np.asarray(digits_data_norm)


wine_data_norm = []
for col in wine.data.T:
    col_norm = col/np.amax(col)
    wine_data_norm.append(col_norm)
    
wine.data = np.asarray(wine_data_norm).T

In [7]:
# 5-fold cross validation for digits dataset

digitsTrainingSetSize = int(np.ceil(0.8 * len(digits.data)))
digitsValidationSetSize = int(len(digits.data) - digitsTrainingSetSize)

xDigitsTrainingSets = []
yDigitsTrainingSets = []
xDigitsValidationSets = []
yDigitsValidationSets = []

for foldIndex in range(5):

    xValidationSet = []
    yValidationSet = []

    for index, data in enumerate(digits.data[foldIndex*digitsValidationSetSize:((foldIndex*digitsValidationSetSize)+digitsValidationSetSize)]):
        xValidationSet.append(data.tolist())
        yValidationSet.append(digits.target[index+(foldIndex*digitsValidationSetSize)])
    
    xTrainingSet = []
    yTrainingSet = []

    for index, data in enumerate(digits.data.tolist()):
        if data not in xValidationSet:
            xTrainingSet.append(data)
            yTrainingSet.append(digits.target[index])
            
    xDigitsTrainingSets.append(xTrainingSet)
    yDigitsTrainingSets.append(yTrainingSet)
    xDigitsValidationSets.append(xValidationSet)
    yDigitsValidationSets.append(yValidationSet)
    
# 5-fold cross validation for wine dataset

wineTrainingSetSize = int(np.ceil(0.8 * len(wine.data)))
wineValidationSetSize = int(len(wine.data) - wineTrainingSetSize)

xWineTrainingSets = []
yWineTrainingSets = []
xWineValidationSets = []
yWineValidationSets = []

for foldIndex in range(5):

    xValidationSet = []
    yValidationSet = []
    for index, data in enumerate(wine.data[foldIndex*wineValidationSetSize:((foldIndex*wineValidationSetSize)+wineValidationSetSize)]):
        xValidationSet.append(data.tolist())
        yValidationSet.append(wine.target[index+(foldIndex*wineValidationSetSize)])
    
    xTrainingSet = []
    yTrainingSet = []
    
    for index, data in enumerate(wine.data.tolist()):
        if data not in xValidationSet:
            xTrainingSet.append(data)
            yTrainingSet.append(wine.target[index])
            
    xWineTrainingSets.append(xTrainingSet)
    yWineTrainingSets.append(yTrainingSet)
    xWineValidationSets.append(xValidationSet)
    yWineValidationSets.append(yValidationSet)

In [8]:
# one-hot encoding of y for digits dataset

numberOfDigitsTargets = 10
numberOfWineTargets = 3

for index, fold in enumerate(yDigitsTrainingSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfDigitsTargets)
        encoding[y] = 1
        encodedFold.append(encoding.tolist())
    yDigitsTrainingSets[index] = encodedFold
    
for index, fold in enumerate(yDigitsValidationSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfDigitsTargets)
        encoding[y] = 1
        encodedFold.append(encoding.tolist())
    yDigitsValidationSets[index] = encodedFold

# one-hot encoding of y for wine dataset

for index, fold in enumerate(yWineTrainingSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfWineTargets)
        encoding[int(y)-1] = 1
        encodedFold.append(encoding.tolist())
    yWineTrainingSets[index] = encodedFold
    
for index, fold in enumerate(yWineValidationSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfWineTargets)
        encoding[int(y)-1] = 1
        encodedFold.append(encoding.tolist())
    yWineValidationSets[index] = encodedFold


In [9]:
def getRandomIndices(arr, batch_size):
    indices = []
    
    if batch_size > len(arr):
        print("Error: batch size larger than size of dataset.")
        return
    
    while batch_size > 0:
        x = np.floor(np.random.random() * len(arr))
        if x not in indices:
            indices.append(int(x))
            batch_size -= 1
    
    return indices

In [10]:
# gradient descent class
 
class GradientDescent:
    
    def __init__(self, batch_size, learning_rate=0.5, momentum=0.9, max_iters=100, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.batch_size = batch_size
        self.max_iters = max_iters
        self.epsilon = epsilon
        self.deltas = []
        
    def run(self, gradient_fn, x, y, w):
        # TODO: use epsilon 
        grad = np.inf
        t = 1
        while np.linalg.norm(grad) > self.epsilon and t < self.max_iters:
            gradients = gradient_fn(x, y, w, self.batch_size)
            for c in range(len(y[0])):
                if(t==1):
                    w[c] = w[c] - self.learning_rate * gradients[c]
                else:
                    delta_w = (self.momentum)*(self.deltas[-(len(y[0]))]) + (1-self.momentum)*gradients[c]
                    w[c] = w[c] - (self.learning_rate)*(delta_w)
                self.deltas.append(w[c])
            t += 1
        return w

In [11]:
# logistic regression

class LogisticRegression:
    def __init__(self, add_bias=True):
        self.add_bias = add_bias
        pass
            
    def fit(self, x, y, optimizer):
        # TODO: add bias
        
        def gradient(x, y, w, batch_size):
            gradients = np.zeros(len(w)).tolist()

            indices = getRandomIndices(x, batch_size)

            for index in indices:
                a = np.asarray(x[index])
                b = np.asarray(y[index])

                for c in range(len(b)):
                    w_x =  w[c] @ a
                    num = np.exp(w_x)

                    den = 0
                    for i in range(len(b)):
                        w_x =  w[i] @ a
                        den += np.exp(w_x)

                    yh_c = num/den

                    y_c = b[c]
                    
                    # TODO: may change, see slide 27 of logistic slideshow
                    cost_c = np.dot(yh_c - y_c, a)
                    
                    gradients[c] += cost_c

            return gradients
        
        w0 = []
        for c in range(len(y[0])):
            w0.append(np.zeros(len(x[0])))
            
        self.w = optimizer.run(gradient, x, y, w0)
        return self
    
    def predict(self, x):
        a = np.asarray(x)
        b = np.asarray(self.w)
        
#         if self.add_bias:
#             x = np.column_stack([x,np.ones(N)])

        yh=[]
        for i,x_c in enumerate(a):
            yh_x=[]
            for c in range(len(b)):
                w_x =  b[c] @ x_c
                num = np.exp(w_x)

                den = 0
                for i in range(len(b)):
                    w_x =  b[i] @ x_c
                    den += np.exp(w_x)

                yh_c = num/den
                yh_x.append(yh_c)
            yh.append(yh_x)
        return yh

In [None]:
def accurate(a, b):
    return np.argmax(a) == np.argmax(b)

def cost(yh, y):
    return y * np.log1p(np.exp(-yh)) + (1-yh) * np.log1p(np.exp(yh))

# TODO: grid-search to find lowest cost combination of model hyper-parameters

batch_size = 30
learning_rate = 0.01
momentum = 0.2

digits_training_accuracy = 0
digits_training_cost = 0
digits_validation_accuracy = 0
digits_validation_cost = 0

for fold_index, fold in enumerate(xDigitsTrainingSets):
    gradientDescentModel = GradientDescent(batch_size, learning_rate, momentum)
    logisticRegressionModel = LogisticRegression(False)
    
    logisticRegressionModel.fit(fold, yDigitsTrainingSets[fold_index], gradientDescentModel)
    yh_training = logisticRegressionModel.predict(xDigitsTrainingSets[fold_index])
    yh_validation = logisticRegressionModel.predict(xDigitsValidationSets[fold_index])
    
    for sample_index, yh_x in enumerate(yh_training):
        if accurate(yh_x, yDigitsTrainingSets[fold_index][sample_index]):
            digits_training_accuracy += 1
        c = np.argmax(yDigitsTrainingSets[fold_index][sample_index])
        cst = cost(yh_x[c], yDigitsTrainingSets[fold_index][sample_index][c])
        digits_training_cost += cst
    
    for sample_index, yh_x in enumerate(yh_validation):
        if accurate(yh_x, yDigitsValidationSets[fold_index][sample_index]):
            digits_validation_accuracy += 1
        c = np.argmax(yDigitsValidationSets[fold_index][sample_index])
        cst = cost(yh_x[c], yDigitsValidationSets[fold_index][sample_index][c])
        digits_validation_cost += cst
        
digits_training_accuracy /= 4*len(digits.data)
digits_training_cost /= 4
digits_validation_accuracy /= len(digits.data)

wine_training_accuracy = 0
wine_training_cost = 0
wine_validation_accuracy = 0
wine_validation_cost = 0

for fold_index, fold in enumerate(xWineTrainingSets):
    gradientDescentModel = GradientDescent(batch_size, learning_rate, momentum)
    logisticRegressionModel = LogisticRegression(False)
    
    logisticRegressionModel.fit(fold, yWineTrainingSets[fold_index], gradientDescentModel)
    yh_training = logisticRegressionModel.predict(xWineTrainingSets[fold_index])
    yh_validation = logisticRegressionModel.predict(xWineValidationSets[fold_index])
    
    for sample_index, yh_x in enumerate(yh_training):
        if accurate(yh_x, yWineTrainingSets[fold_index][sample_index]):
            wine_training_accuracy += 1
        c = np.argmax(yWineTrainingSets[fold_index][sample_index])
        cst = cost(yh_x[c], yWineTrainingSets[fold_index][sample_index][c])
        wine_training_cost += cst
    
    for sample_index, yh_x in enumerate(yh_validation):
        if accurate(yh_x, yWineValidationSets[fold_index][sample_index]):
            wine_validation_accuracy += 1
        c = np.argmax(yWineValidationSets[fold_index][sample_index])
        cst = cost(yh_x[c], yWineValidationSets[fold_index][sample_index][c])
        wine_validation_cost += cst

wine_training_accuracy /= 4*len(wine.data)
wine_training_cost /= 4
wine_validation_accuracy /= len(wine.data)
        
print("Model hyper-parameters:")
print("\tMini-batch size:", batch_size)
print("\tLearning rate:", learning_rate)
print("\tMomentum:", momentum)
print("Digits training accuracy:", digits_training_accuracy)
print("Digits training cost:", digits_training_cost)
print("Digits validation accuracy:", digits_validation_accuracy)
print("Digits validation cost:", digits_validation_cost)
print("Wine training accuracy:", wine_training_accuracy)
print("Wine training cost:", wine_training_cost)
print("Wine validation accuracy:", wine_validation_accuracy)
print("Wine validation cost:", wine_validation_cost)


In [None]:
# TODO: comparison against another classifier (e.g. KNN)