diff --git a/Review-Analyzer/Review_Analyzer.py b/Review-Analyzer/Review_Analyzer.py index cc0610d..27fb347 100644 --- a/Review-Analyzer/Review_Analyzer.py +++ b/Review-Analyzer/Review_Analyzer.py @@ -1,6 +1,7 @@ from string import punctuation, digits import numpy as np import random +from math import sqrt # Part I @@ -80,7 +81,7 @@ def perceptron(feature_matrix, labels, T): #pragma: coderesponse template def average_perceptron(feature_matrix, labels, T): - + new_theta = np.zeros((feature_matrix.shape[1],)) new_theta_0 = 0 sum_of_theta = new_theta @@ -106,61 +107,39 @@ def pegasos_single_step_update( eta, current_theta, current_theta_0): - """ - Properly updates the classification parameter, theta and theta_0, on a - single step of the Pegasos algorithm - Args: - feature_vector - A numpy array describing a single data point. - label - The correct classification of the feature vector. - L - The lamba value being used to update the parameters. - eta - Learning rate to update parameters. - current_theta - The current theta being used by the Pegasos - algorithm before this update. - current_theta_0 - The current theta_0 being used by the - Pegasos algorithm before this update. - - Returns: A tuple where the first element is a numpy array with the value of - theta after the current update has completed and the second element is a - real valued number with the value of theta_0 after the current updated has - completed. - """ - # Your code here + for i in range(3): + new_theta = current_theta + new_theta_0 = current_theta_0 + if label * (np.dot(current_theta,feature_vector) + current_theta_0) <= 1: + new_theta = new_theta*(1 - eta*L) + eta*label*feature_vector + new_theta_0 = new_theta_0 + eta*label + else: + new_theta = (1 - eta*L)*new_theta + new_theta_0 = new_theta_0 + return new_theta,new_theta_0 raise NotImplementedError #pragma: coderesponse end #pragma: coderesponse template def pegasos(feature_matrix, labels, T, L): - """ - Runs the Pegasos algorithm on a given set of data. Runs T - iterations through the data set, there is no need to worry about - stopping early. - For each update, set learning rate = 1/sqrt(t), - where t is a counter for the number of updates performed so far (between 1 - and nT inclusive). - - NOTE: Please use the previously implemented functions when applicable. - Do not copy paste code from previous parts. - - Args: - feature_matrix - A numpy matrix describing the given data. Each row - represents a single data point. - labels - A numpy array where the kth element of the array is the - correct classification of the kth row of the feature matrix. - T - An integer indicating how many times the algorithm - should iterate through the feature matrix. - L - The lamba value being used to update the Pegasos - algorithm parameters. - - Returns: A tuple where the first element is a numpy array with the value of - the theta, the linear classification parameter, found after T - iterations through the feature matrix and the second element is a real - number with the value of the theta_0, the offset classification - parameter, found after T iterations through the feature matrix. - """ - # Your code here + new_theta = np.zeros((feature_matrix.shape[1],)) + new_theta_0 = 0 + update_count = 1 + for t in range(T): + for i, feature_vector in enumerate(feature_matrix): + eta = 1/sqrt(update_count) + if labels[i] * (np.dot(new_theta,feature_vector) + new_theta_0) <= 1: + new_theta = new_theta*(1 - eta*L) + eta*labels[i]*feature_vector + new_theta_0 = new_theta_0 + eta*labels[i] + update_count += 1 + else: + new_theta = (1 - eta*L)*new_theta + new_theta_0 = new_theta_0 + update_count += 1 + return new_theta, new_theta_0 raise NotImplementedError #pragma: coderesponse end