# Lab 3 Extending Logistic Regression

#### Optimization
###### Params:
* Batch Gradient Descent -> 'bgd'
* Stochastic Gradient Descent -> 'sgd'
* Newton's Method -> 'newtons'
* Eta
* iterations
* Hessian

#### Regularization
###### Params:
* C -> default = 0
* L1 (Lasso) -> 'lasso'
* L2 (Ridge) -> 'ridge'
* Both -> 'elastic_net'

In [3]:
class BinaryLogisticRegressionBase:
    # private:
    def __init__(self, optimization='bgd', eta = 0.01, iterations=20, regularization='ridge', c=0):
        self.eta = eta
        self.iters = iterations
        self.opt = optimization
        self.reg = regularization
        self.c = c
        # internally we will store the weights as self.w_ to keep with sklearn conventions
    
    def __str__(self):
        return 'Base Binary Logistic Regression Object, Not Trainable'
    
    # convenience, private and static:
    @staticmethod
    def _sigmoid(theta):
        return 1/(1+np.exp(-theta)) 
    
    @staticmethod
    def _add_bias(X):
        return np.hstack((np.ones((X.shape[0],1)),X)) # add bias term
    
    # public:
    def predict_proba(self,X,add_bias=True):
        # add bias term if requested
        Xb = self._add_bias(X) if add_bias else X
        return self._sigmoid(Xb @ self.w_) # return the probability y=1
    
    def predict(self,X):
        return (self.predict_proba(X)>0.5) #return the actual prediction
    

In [4]:
class BinaryLogisticRegression(BinaryLogisticRegressionBase):
    #private:
    def __str__(self):
        if(hasattr(self,'w_')):
            return 'Binary Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained Binary Logistic Regression Object'
        
    #optimization methods
    def _get_gradient(self, X, y):
        
        gradient = None
        if self.opt == 'bgd': gradient = _batch_gradient_descent
        elif self.opt == 'sgd': gradient = _stochastic_gradient_descent
        elif self.opt == 'newton': gradient = _newtons_method
        elif self.opt == 'one_hessian': gradient = _one_step_hessian
            
        return gradient(X,y)
    
    def _batch_gradient_descent(self,X,y):
        return (2/len(X)) * (X.T.dot(X.dot(self.w_)-y)) + self.c*_get_reg_gradient()
    
    def _stochastic_gradient_descent(self,X,y):
        randomIndex = np.random.randint(len(y))
        
        ydiff = y[idx]-self._sigmoid(X[idx] @ self.w_) # get y difference (now scalar)
        gradient = X[idx] * ydiff[:,np.newaxis] # make ydiff a column vector and multiply through
        
        gradient = gradient.reshape(self.w_.shape)
        gradient[1:] +=  self.c * _get_reg_gradient()
        
        return gradient
    
    def _newtons_method():
        sigmoid_z = (sigma1*X + sigma2).astype("float_")
        sigmoid = 1.0/(1.0 + np.exp(-z))
        return np.sum(y * np.log(sigmoid) + (1 - y) * np.log(1 - sigmoid))
    
    def _one_step_hessian(self):
        g = self.predict_proba(X,add_bias=False).ravel() # get sigmoid value for all classes
        hessian = X.T @ np.diag(g*(1-g)) @ X - 2 * self.C # calculate the hessian

        ydiff = y-g # get y difference
        gradient = np.sum(X * ydiff[:,np.newaxis], axis=0) # make ydiff a column vector and multiply through
        gradient = gradient.reshape(self.w_.shape)
        gradient[1:] += self.c * _get_reg_gradient()
        
        return pinv(hessian) @ gradient
    
    
    
    #regularization methods
    def _get_reg_gradient(self):
        if self.reg == 'ridge': return self.w_
        elif self.reg == 'lasso': return np.sign(self.w_)
        elif self.reg == 'elastic_net': return self.w + np.sign(self.w_)
    
       
    # public:
    def fit(self, X, y):
        Xb = self._add_bias(X) # add bias term
        num_samples, num_features = Xb.shape
        
        self.w_ = np.zeros((num_features,1)) # init weight vector to zeros
        
        # for as many as the max iterations
        for _ in range(self.iters):
            gradient = self._get_gradient(Xb,y)
            self.w_ += gradient*self.eta # multiply by learning rate 

### Logistic Regression Class

In [9]:
class LogisticRegression:
    
    def __init__(self, optimization='bgd', eta = 0.01, iterations=20, regularization='ridge', c=0):
    
        self.eta = eta
        self.iters = iterations
        self.opt = optimization
        self.reg = regularization
        self.c = c
        
    
    def __str__(self):
        if(hasattr(self,'w_')):
            return 'MultiClass Logistic Regression Object with coefficients:\n'+ str(self.w_) # is we have trained the object
        else:
            return 'Untrained MultiClass Logistic Regression Object'
    
    
    def fit(self,X,y):
        num_samples, num_features = X.shape
        self.unique_ = np.unique(y) # get each unique class value
        num_unique_classes = len(self.unique_)
        self.classifiers_ = [] # will fill this array with binary classifiers
        
        for i,yval in enumerate(self.unique_): # for each unique value
            y_binary = (y==yval) # create a binary problem
            # train the binary classifier for this class
            blr = BinaryLogisticRegression(self.opts, self.eta, self.iters, self.reg, self.c )
            blr.fit(X,y_binary)
            # add the trained classifier to the list
            self.classifiers_.append(blr)
            
        # save all the weights into one matrix, separate column for each class
        self.w_ = np.hstack([x.w_ for x in self.classifiers_]).T
        
    def predict_proba(self,X):
        probs = []
        for blr in self.classifiers_:
            probs.append(blr.predict_proba(X)) # get probability for each classifier
        
        return np.hstack(probs) # make into single matrix
    
    def predict(self,X):
        return np.argmax(self.predict_proba(X),axis=1) # take argmax along row
    
    
lr = LogisticRegression('bgd',0.01, 100, 'ridge')
print(lr)

Untrained MultiClass Logistic Regression Object
