In [1]:
import numpy as np
import numpy.linalg as lia
import pandas as pd
import matplotlib as plt

In [2]:
from sklearn import datasets
digits = datasets.load_digits()

In [3]:
print(len(digits.data))
print(digits.target.size)

1797
1797


In [4]:
from sklearn.datasets import fetch_openml
wine = fetch_openml(name='wine', version=1)

In [5]:
print(len(wine.data))
print(wine.target.size)

178
178


In [6]:
#normalising wine data
wine_data_norm = []
for col in wine.data.T:
    col_norm = col/np.amax(col)
    wine_data_norm.append(col_norm)
    
wine.data = np.asarray(wine_data_norm).T

In [7]:
# 5-fold cross validation for digits dataset

digitsTrainingSetSize = int(np.ceil(0.8 * len(digits.data)))
digitsValidationSetSize = int(len(digits.data) - digitsTrainingSetSize)

xDigitsTrainingSets = []
yDigitsTrainingSets = []
xDigitsValidationSets = []
yDigitsValidationSets = []

for foldIndex in range(5):

    xValidationSet = []
    yValidationSet = []

    for index, data in enumerate(digits.data[foldIndex*digitsValidationSetSize:((foldIndex*digitsValidationSetSize)+digitsValidationSetSize)]):
        xValidationSet.append(data.tolist())
        yValidationSet.append(digits.target[index])
    
    xTrainingSet = []
    yTrainingSet = []

    for index, data in enumerate(digits.data.tolist()):
        if data not in xValidationSet:
            xTrainingSet.append(data)
            yTrainingSet.append(digits.target[index])
            
    xDigitsTrainingSets.append(xTrainingSet)
    yDigitsTrainingSets.append(yTrainingSet)
    xDigitsValidationSets.append(xValidationSet)
    yDigitsValidationSets.append(yValidationSet)
    
# 5-fold cross validation for wine dataset

wineTrainingSetSize = int(np.ceil(0.8 * len(wine.data)))
wineValidationSetSize = int(len(wine.data) - wineTrainingSetSize)

xWineTrainingSets = []
yWineTrainingSets = []
xWineValidationSets = []
yWineValidationSets = []

for foldIndex in range(5):

    xValidationSet = []
    yValidationSet = []

    for index, data in enumerate(wine.data[foldIndex*wineValidationSetSize:((foldIndex*wineValidationSetSize)+wineValidationSetSize)]):
        xValidationSet.append(data.tolist())
        yValidationSet.append(wine.target[index])
    
    xTrainingSet = []
    yTrainingSet = []

    for index, data in enumerate(wine.data.tolist()):
        if data not in xValidationSet:
            xTrainingSet.append(data)
            yTrainingSet.append(wine.target[index])
            
    xWineTrainingSets.append(xTrainingSet)
    yWineTrainingSets.append(yTrainingSet)
    xWineValidationSets.append(xValidationSet)
    yWineValidationSets.append(yValidationSet)
    

In [8]:
# one-hot encoding of y for digits dataset

numberOfDigitsTargets = 10
numberOfWineTargets = 3

for index, fold in enumerate(yDigitsTrainingSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfDigitsTargets)
        encoding[y] = 1
        encodedFold.append(encoding.tolist())
    yDigitsTrainingSets[index] = encodedFold
    
for index, fold in enumerate(yDigitsValidationSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfDigitsTargets)
        encoding[y] = 1
        encodedFold.append(encoding.tolist())
    yDigitsValidationSets[index] = encodedFold

# one-hot encoding of y for wine dataset

for index, fold in enumerate(yWineTrainingSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfWineTargets)
        encoding[int(y)-1] = 1
        encodedFold.append(encoding.tolist())
    yWineTrainingSets[index] = encodedFold
    
for index, fold in enumerate(yWineValidationSets):
    encodedFold = []
    for i, y in enumerate(fold):
        encoding = np.zeros(numberOfWineTargets)
        encoding[int(y)-1] = 1
        encodedFold.append(encoding.tolist())
    yWineValidationSets[index] = encodedFold


In [9]:
def getRandomIndices(arr, batch_size):
    indices = []
    
    if batch_size > len(arr):
        print("Error: batch size larger than size of dataset.")
        return
    
    while batch_size > 0:
        x = np.floor(np.random.random() * len(arr))
        if x not in indices:
            indices.append(int(x))
            batch_size -= 1
    
    return indices

In [10]:
# gradient descent class
 
class GradientDescent:
    
    def __init__(self, batch_size, learning_rate=0.5, momentum=0.9, max_iters=10, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.batch_size = batch_size
        self.max_iters = max_iters
        self.epsilon = epsilon
            
    def run(self, gradient_fn, x, y, w):
        # TODO: use epsilon
        
        grad = np.inf
        t = 1
        
        while np.linalg.norm(grad) > self.epsilon and t < self.max_iters:
            # TODO: implement momentum here
            
            print("gradient descent step:", t)
            
            gradients = gradient_fn(x, y, w, self.batch_size)
            
            for c in range(len(y[0])):
                w[c] = w[c] - self.learning_rate * gradients[c]
                print("w for class: ", w[c])
            
            print("###################################################")
            
            t += 1
        
        return w

In [11]:
# logistic regression

class LogisticRegression:
    def __init__(self, add_bias=True):
        self.add_bias = add_bias
        pass
            
    def fit(self, x, y, optimizer):
        # TODO: add bias
        
        def gradient(x, y, w, batch_size):
            gradients = np.zeros(len(w)).tolist()

            indices = getRandomIndices(x, batch_size)
            
            print("indices for batch:", indices)

            for index in indices:
                a = np.asarray(x[index])
                b = np.asarray(y[index])
                
                print("x:", a.astype(int))
                print("y:", b)
                
                # do max normalization on input for
                # numerical stability during softmax
                
#                 max_x = np.amax(a)
#                 a = a - max_x

                print("x:", a)
                for c in range(len(b)):
                    w_x =  w[c] @ a
                    num = np.exp(w_x)

                    den = 0
                    for i in range(len(b)):
                        w_x =  w[i] @ a
                        den += np.exp(w_x)

                    yh_c = num/den

                    y_c = b[c]
                    cost_c = np.dot(yh_c - y_c, a)
                    
                    gradients[c] += cost_c
                    
                    print("class:", c)
                    print("softmax numerator:", num)
                    print("softmax denominator:", den)
                    print("y hat for class:", yh_c)
                    print("y actual for class:", y_c)
                    print("x gradient:", cost_c)
                    print("new gradient for class:", gradients[c])

            return gradients
        
#        w0 = np.random.rand(len(y[0]),len(x[0])).tolist()
        
        w0 = []
        for c in range(len(y[0])):
            w0.append(np.zeros(len(x[0])))
            
        self.w = optimizer.run(gradient, x, y, w0)
    
    def predict(self, x):
        # TODO: not tested yet, so not sure if it works
        
        if self.add_bias:
            x = np.column_stack([x,np.ones(N)])
        yh = x@self.w
        return yh

In [12]:
gradientDescentModel = GradientDescent(2)
logisticRegressionModel = LogisticRegression(False)

In [13]:
xtest = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [1, 1, 2, 2]])
ytest = np.asarray([[0, 1, 0], [1, 0, 1], [0, 0, 1]])

logisticRegressionModel.fit(xWineTrainingSets[0], yWineTrainingSets[0], gradientDescentModel)

#logisticRegressionModel.fit(xtest, ytest, gradientDescentModel)



#xWineTrainingSets = []
#yWineTrainingSets = []
#xWineValidationSets = []
#yWineValidationSets = []

gradient descent step: 1
indices for batch: [53, 90]
x: [0 0 0 0 0 0 0 0 0 0 0 0 0]
y: [0. 1. 0.]
x: [0.78489548 0.35517241 0.76160991 0.72       0.51851852 0.50257732
 0.33267717 0.72727273 0.37709497 0.21538462 0.58479532 0.6875
 0.4047619 ]
class: 0
softmax numerator: 1.0
softmax denominator: 3.0
y hat for class: 0.3333333333333333
y actual for class: 0.0
x gradient: [0.26163183 0.1183908  0.25386997 0.24       0.17283951 0.16752577
 0.11089239 0.24242424 0.12569832 0.07179487 0.19493177 0.22916667
 0.13492063]
new gradient for class: [0.26163183 0.1183908  0.25386997 0.24       0.17283951 0.16752577
 0.11089239 0.24242424 0.12569832 0.07179487 0.19493177 0.22916667
 0.13492063]
class: 1
softmax numerator: 1.0
softmax denominator: 3.0
y hat for class: 0.3333333333333333
y actual for class: 1.0
x gradient: [-0.52326365 -0.23678161 -0.50773994 -0.48       -0.34567901 -0.33505155
 -0.22178478 -0.48484848 -0.25139665 -0.14358974 -0.38986355 -0.45833333
 -0.26984127]
new gradient for cla