In [2]:
import math
import numpy as np

class LogReg:
    def __init__(self,dim, l_rate):
        dim = max(1, dim)
        self.dim = dim
        self.l_rate = l_rate # learning rate
        self.weights = np.zeros(dim+1)

    def init_weights(self):
        self.weights = np.zeros(self.dim + 1)
        
    def reshape_X(self, X):
        #number of examples
        if len(X.shape) > 1 and X.shape[0] >= 1:
            num_ex = X.shape[0]
            return np.c_[ np.ones(num_ex), X]
        else:
            cur_size = X.size
            return np.r_[1, X]

    def risk_score(self, X):
        #should return (n, 1)
        res_X = self.reshape_X(X)
        my_risk = np.dot(res_X,self.weights)
        return my_risk

    def sigmoid(self, X):
        #theta(s) = e^s/(1+e^s)
        cur_es = np.exp(risk_score(X))
        return np.divide(cur_es, np.add(1, cur_es))

    def gradient(self, X, y):
        #grad(E_in) = (-1/N)*sum(n=1;N){(y_n*x_n)/(1+e^(y_n*wT(t)*x_n))}
        res_X = self.reshape_X(X)
        cur_N = X.shape[0]
        cur_numer = np.multiply(y,res_X) #y_n*x_n by row, should be (n,dim+1)
        #should return (n,1)
        cur_denom = np.add(1, np.exp(np.multiply(y, self.risk_score(X))))
        #divide cur_numer row wise by cur_denom, should still be (n, dim+1)
        presum = np.divide(cur_numer, cur_denom)
        #sum by column
        cur_sum = np.sum(presum, axis = 0)
        #now normalize by (-1/N) and return
        cur_sum = np.divide(cur_sum, -1*cur_N)
        return cur_sum

    def sto_gradient(self, xn, yn):
        #stochastic gradient, should be only one example
        res_X = self.reshape_X(xn)
        cur_numer = np.multiply(yn, res_X)
        cur_denom = np.add(1, np.exp(np.multiply(yn, self.risk_score(xn))))
        return np.multiply(-1, np.divide(cur_numer, cur_denom))
    
    def update_weights(self, X, y):
        #w(t+1) = w(t) - l_rate * gradient
        cur_grad = self.gradient(X,y)
        self.weights = np.subtract(self.weights, np.multiply(self.l_rate, cur_grad))
    
    def sto_gd(self, X, y):
        # a run of stochastic gradient descent
        cur_num = X.shape[0]
        #get indices for every row/example in X and shuffle them
        cur_idxs = np.arange(cur_num)
        np.random.shuffle(cur_idxs)
        #now update weights one by one
        for cur_idx in cur_idxs:
            cur_grad = self.sto_gradient(X[cur_idx], y[cur_idx])
            self.weights = np.subtract(self.weights, np.multiply(self.l_rate, cur_grad))

    def ce_error(self, X, y):
        #cross-entropy error
        #e_in = (1/N) sum(n=1;N){ ln(1+e^(-yn*wT*xn))}
        res_X = self.reshape_X(X)
        cur_N = res_X.shape[0]
        cur_val = np.log(np.add(1, np.exp(np.multiply(np.multiply(-1,y), self.risk_score(X)))))
        #should be (n,1)
        return np.divide(np.sum(cur_val), cur_N)
        

        


In [23]:
class Line:
    def __init__(self, p1, p2):
        self.p1 = p1
        self.p2 = p2
        diff = np.subtract(p2, p1)
        if diff[0] <= 0.0001:
            self.slope = None
            self.is_vert = True
        else:
            self.slope = diff[1]/diff[0]
            self.is_vert = False
        if not self.is_vert:
            self.y_int = ((-1 * p1[1])/self.slope) + p1[0]

        
    def label(self,testpt):
        if self.is_vert == False:
            line_y = self.slope*testpt[0] + self.y_int
            diff = testpt[1] - line_y
        else:
            line_x = self.p1[0]
            diff = testpt[0] - line_x
        return np.sign(diff)
    
    def label_pts(self, ptset):
        label_list = np.array([])
        for pt in ptset:
            label = self.label(pt)
            label_list = np.concatenate((label_list, [label]))
        return label_list
    
logreg = LogReg(2, 0.01)         
LogR_EXP = 100 
LogR_N = 100 
LogR_WTHRESH = 0.01 

logr_epochs = np.array([]) 
logr_eout = np.array([])

for i in range(LogR_EXP):
    logreg.init_weights() 
    line_pts = np.random.uniform(-1, 1, (2,2))
    line = Line(line_pts[0], line_pts[1])
    train_pts = np.random.uniform(-1,1,(LogR_N,2))
    labels = line.label_pts(train_pts)
    
    epochs = 0 
    wdiff = 1
    while wdiff >= LogR_WTHRESH:
        epochs = epochs + 1
        w_t = logreg.weights 
        logreg.sto_gd(train_pts, labels) 
        w_p = logreg.weights 
        wdiff = np.linalg.norm(np.subtract(w_p, w_t)) 
        
    logr_epochs = np.concatenate((logr_epochs, [epochs])) 
    
    Eouts = np.random.uniform(-1,1, (LogR_N, 2))
    Eouts_labels = line.label_pts(Eouts)
    eout = logreg.ce_error(Eouts, Eouts_labels)
    
    logr_eout = np.concatenate((logr_eout,[eout]))

logr_epochs_avg = np.average(logr_epochs)
logr_eout_avg = np.average(logr_eout)

In [24]:
lr_epochs_avg

302.64

In [25]:
lr_eout_avg

0.08771792982246518