In [1]:
from sklearn.base import BaseEstimator

class RidgeRegression(BaseEstimator):
    
    def __init__(self, opt_method='gd', alpha=1., eta=0.01, maxsteps=100, eps=0.00000001):
        '''Implements a Ridge Regression estimator.
        
        Arguments
        ---------
        alpha:      Regularization proportionality factor. Larger values
                    correspond with stronger regularization.
        opt_method: Optimization method to choose for the cost function.
                    Can be either 'gd' (Gradient Descent) or 'neq'.
        maxsteps:   Maximum number of Gradient Descent steps to take.
        eps:        Epsilon, length of gradient to be reached with Gradient
                    Descent.
        eta:        Fixed step lenght to take at each gradient descent
                    iteration.
        '''
        # parameters
        self.alpha = alpha
        self.opt_method = opt_method
        self.maxsteps = maxsteps
        self.eps = eps
        self.eta = eta
        # attributes
        # model coefficients
        self.beta_ = None
        # values of cost function along gradient descent iterations
        self.costs_ = []  
        self.bins = [0, 5000, 10000, 20000, 100000]
        
    def fit(self,X,y):
        if(self.opt_method == "neq"):
            type(X)
            return(self.normalequation(X,y))
        else:
            raise Exception("No available optimization method was chosen.")
            
    def normalequation(self,X,y):
        '''Computes the coefficients of the ridge regression cost function
        using the normalequation.
        '''
        
        XX = np.zeros((len(X),X.shape[1]+1))
        XX[:,0] = np.ones(len(X))

        XX[:,1:] = X
        
        #identity matrix with full range
        identity = np.identity(XX.shape[1])
        identity[0,:] = np.zeros(identity.shape[0])
        
        self.beta_ = np.linalg.inv(XX.T @ XX + self.alpha * identity) @ XX.T @ y
        
        return(self.beta_)
    
    @staticmethod 
    def costfunction(beta,X,y,alpha):
        '''Computes and returns the value of the ridge regression cost function.
        '''
        
        cost = np.sum((y - beta[0] + np.dot(X, beta[1:])**2) + alpha * np.sum(beta.T @ beta))
        
        return cost
    
    def predict(self,X):
        '''Computes the predictions of the current model.
            takes in a Xx8 matrix and spits out its respective prediction
        '''
        
        if(self.beta_ is not None):
            predictions = self.beta_[0] + np.dot(X, self.beta_[1:])

            return predictions
        
        raise Exception("Model coefficient haven't been calculated yet. Please call the specific functions to calculate the coefficients.")
        
    @staticmethod
    def score(y,y_hat):
        '''Returns R^2 for given input/output data given the model
        coefficients. 1 - (sum of squares of residuals / total sum of squares) | sum of squares = sum of (yi - mean(y))^2
        '''
        
        ss_res = np.sum((y - y_hat)**2)
                        
        ss_tot = np.sum((y-np.mean(y))**2)
        
        score = 1-(ss_res/ss_tot)
        
        return score
    
    @staticmethod
    def mae(y, y_hat):
        mae =  (1 / len(y)) * np.sum(abs(y-y_hat))
        return mae
    
    @staticmethod
    def scoreLevels(y, y_hat):
        mae =  (1 / len(y)) * np.sum(abs(y-y_hat))
        return mae