In [1]:
import numpy as np

In [2]:
test_data = np.load('test_features.npy')
test_labels = np.load('test_labels.npy')
train_data = np.load('train_features.npy')
train_labels = np.load('train_labels.npy')

In [3]:
print(train_data.shape)
print(train_labels.shape)
print(test_data.shape)
print(test_labels.shape)

(1561, 2)
(1561,)
(424, 2)
(424,)


In [4]:
'''This will be used in prediction part.'''
print(np.unique(train_labels))

[-1  1]


In [8]:
'''sigmoid function'''
def sigmoid(s):
    return 1 / (1 + np.exp(-s))


'''Here, while computing stochastic gradient descent, 
we should be careful about using random data points. 
This part is different from the gradient descent algorithm.
Therefore,we can implement logistic regression with stochastic descent.
Also, eta refers to learning rate parameter, w0 is bias parameter
and w1 and w2 are our weights'''
def log_regression_with_sgd(labels, data, eta, epochs):
    w0,w1,w2 = 0,0,0
    weights = np.array([[w0],[w1],[w2]])
    weights_star = []
    for epoch in range(epochs):
        ein_weights = 0
        for j in range(0,len(labels)):
            random_index = np.random.randint(0, len(labels))
            ein_weights += (labels[random_index]*data[random_index,:])*sigmoid(-labels[random_index]*weights.T @ data[random_index,:]) 
            stochastic_gradient = -(1/len(labels)*ein_weights)
        weights = weights - np.asarray(eta * (stochastic_gradient)).reshape(3,1)   
    weights_star.append(weights)
    return weights_star

def predict_data_sgd(data, weights_star):
    return data @ weights_star

'''To calculate accuracy, we can use predicted data and our real labels for the test data.'''
def accuracy(labels, data, weights_star):
    predictions = predict_data_sgd(data, weights_star)
    predicted_labels = np.where(predictions > 0 , 1, -1)
    correct_labels = np.count_nonzero((np.equal(labels, predicted_labels)))  
    return correct_labels / len(labels)



def accuracies(train_labels, train_data, test_labels, test_data, eta, epochs):
    weights_star = log_regression_with_sgd(train_labels, train_data, eta, epochs)
    weights_star = np.array(weights_star).reshape(3,1)
    training_accuracy = accuracy(train_labels, train_data, weights_star)
    test_accuracy = accuracy(test_labels, test_data, weights_star)
    return training_accuracy, test_accuracy




In [9]:
'''Here, we should add a column with ones of test and train data for bias
because our weights matrix includes bias by first value'''

update_train_data = np.append(np.ones((len(train_data),1)),train_data,axis=1)
update_test_data = np.append(np.ones((len(test_data),1)),test_data,axis=1)
train_labels = train_labels.reshape(1561,1)
test_labels = test_labels.reshape(424,1)

In [10]:
'''We can choose random values for leaning rate and number of epochs.
Here, we selected them as 0.15 and 2000 respectively.'''

train_accuracy, test_accuracy = accuracies(train_labels, update_train_data, 
                                              test_labels, update_test_data, 0.15, 2000)


print(f"Train accuracy: {train_accuracy}")
print(f"Test accuracy: {test_accuracy}\n")

Train accuracy: 0.9769378603459321
Test accuracy: 0.9528301886792453

