In [1]:
import numpy as np
from gradient_descent_solutionnnnnt import GradientDescent

__author__ = "Jared Thompson"

class LogisticRegression(object):

    def __init__(self, fit_intercept = True, scale = True, norm = "L2"):
        '''
        INPUT: GradientDescent, function, function, function
        OUTPUT: None
        Initialize class variables. Takes three functions:
        cost: the cost function to be minimized
        gradient: function to calculate the gradient of the cost function
        predict: function to calculate the predicted values (0 or 1) for
        the given data
        '''
        
        # * The use of some of these attributes are a matter of taste. 
        # You're going to want to store user selection of norm and fit_intercept.
        
        # Note that the cost_gradients are given below
        gradient_choices = {None: self.cost_gradient, "L1": self.cost_gradient_lasso, "L2": self.cost_gradient_ridge}

        self.alpha = None
        self.gamma = None
        self.coeffs = None
        self.num_iterations = 0
        self.fit_intercept = fit_intercept
        self.scale = scale
        self.normalize = False
        if norm:
            self.norm = norm
            self.normalize = True
        # All of the magic of regularization is done here. 
        # You pass this choice of function into the gradient descent as 
        # self.gradient
        self.gradient = gradient_choices[norm] 
        self.mu=0
        self.sigma=1
        #fit_intercept=True, normalize=False, gradient=None, mu=None, sigma=None
#        self.threshold=threshold
    def fit(self,  X, y, alpha=0.01, num_iterations=10000, gamma=0.):
        '''
        INPUT: 2 dimensional numpy array, numpy array, float, int, float
        OUTPUT: numpy array
        Main routine to train the model coefficients to the data
        the given coefficients.
        '''
        # * just as you did in gradient_descent, you'll need to store
        # the alpha and gamma variables and the 
        # number of interations you intend to run.
        self.alpha=alpha
        self.gamma=gamma
        self.num_iterations=num_iterations
        # You may also consider storing the dimensions of X.
        self.dimensions=X.shape
        
        
        # * You need to initialize the coefficients - I recommend random numbers.
        # It is at this point that you'll be determining the data structure of          
        # the coefficients. I recommend numpy vectors.
        self.coeffs = np.random.random(X.shape[1])
                
        # * Instantiate a gradientdescent instance
        gradient_descentInstance=GradientDescent(self.fit_intercept, self.normalize, self.gradient)
        # * Now .run() the gradient. Notice that you need to use the local copy        
        # of coefficients as an argument to .run(). The gradientdescent instance       
        # calculates and stores the new coefficients to itself, i.e.
        # gradient.coeffs
        inst=gradient_descentInstance.run(X,y,self.coeffs)
        
        # * save the newly calculated coefficients to self.coeffs.
        self.coeffs=inst.coeffs
        return gradient_descentInstance.newX
    def predict(self, X):
        '''
        INPUT: 2 dimensional numpy array, numpy array
        OUTPUT: numpy array
        Calculate the predicted values (0 or 1) for the given data with
        the given coefficients.
        '''
        # * You need to think of this as returning the value of the hypothesis          
        # function, but rounded using a threshold. One easy way to do this is to        
        # assume that the threshold is 0.5 and just use the np.around() function.         
        # Return a bool or binary valued array, one finding for each data point in X.
        
        if self.normalize:
            X = (X - self.mu) / self.sigma

        if self.fit_intercept:
            X = np.hstack((np.ones((X.shape[0],1)), X))
        
        number=self.hypothesis(X, self.coeffs)
        print(number)
        n=np.around(number)
        return n

    def hypothesis(self, X, coeffs):
        '''
        THIS IS WHERE I HAD THE MOST ISSUES
        
        
        INPUT: 2 dimensional numpy array, numpy array
        OUTPUT: numpy array of floats
        Calculate the predicted percentages (floats between 0 and 1)
        for the given data with the given coefficients.
        '''
        # * Here you're returning a vector of scalar value calculated from the 
        # hypothesis function h(X) = 1/1+e^{-BX}. Remember that each row of the 
        # feature matrix is a set of feature values for a single datapoint. The        
        # hypothesis function requires a dot product between the coefficient      
        # vector and each of those values to get a single estimate for that data     
        # point.
        if X.shape[1] != len(coeffs):
            print("AHHHHHH")
        else:
            return 1 / (1 + np.exp(- (X.dot(coeffs))))

    def cost_function(self, X, y, coeffs):
        '''
        INPUT: 2 dimensional numpy array, numpy array, numpy array
        OUTPUT: float (a scalar)
        Calculate the value of the cost function for the data with the
        given coefficients.
        '''
        # * you'll need to have the vector of values from the hypothesis function
        hypoth_vector = self.hypothesis(X, coeffs)
        
        y_left = y.T.dot(log(hypoth_vector))
        y_right = (1 - y).T.dot(np.log(1 - hypoth_vector))
        m=y.shape[0]
        
        cost=(1/m)*((y_right)+(y_left))
        return cost
        

        # * Return the cost function, 1/m*(y.h(x)+(1-y).(1-h(x)))

    def cost_lasso(self, X, y, coeffs):
        '''
        INPUT: 2 dimensional numpy array, numpy array, numpy array
        OUTPUT: float
        Calculate the value of the cost function with lasso regularization
        for the data with the given coefficients.
        '''
        # * you'll need to have the vector of values from the hypothesis function

        # * you'll return the cost function as above plus the appropriate formula
        # for the regularization
        cost=self.cost_function(X, y, coeffs)
        lasso=self.gamma * sum(abs(coeffs))
        cost_lass=cost+lasso
        return cost_lass

    def cost_ridge(self, X, y, coeffs):
        '''
        INPUT: 2 dimensional numpy array, numpy array, numpy array
        OUTPUT: float
        Calculate the value of the cost function with ridge regularization
        for the data with the given coefficients.
        '''
        # * you'll need to have the vector of values from the hypothesis function

        # * you'll return the cost function as above plus the appropriate formula
        # for the regularization
        cost = self.cost_function(X, y, coeffs)
        ridge = self.gamma * coeffs.dot(coeffs.T) / 2
        
        cost_ridgee=cost+ridge
        return cost_ridgee

    def cost_gradient(self, X, y, coeffs):
        '''
        INPUT: 2 dimensional numpy array, numpy array, numpy array
        OUTPUT: numpy array
        Calculate the gradient of the cost function at the given value
        for the coeffs.
        Return an array of the same size as the coeffs array.
        '''
        # * you'll need to have the vector of values from the hypothesis function
        # * return the formula that you learned in class X^T.(y-h)
        hyp = self.hypothesis(X, coeffs)
        cost_grad=X.T.dot(y - hyp)
        return cost_grad

    
    def cost_gradient_lasso(self, X, y, coeffs):
        '''
        INPUT: 2 dimensional numpy array, numpy array, numpy array
        OUTPUT: numpy array
        Calculate the gradient of the cost function with regularization
        at the given value for the coeffs.
        Return an array of the same size as the coeffs array.
        '''
        # * you'll need to have the vector of values from the hypothesis function
        # * you will also need to calculate the appropriate vector of 
        # coefficients for regularizationn (weights). Do the calculus yourself.

        # * make sure to insert an intercept

        # * return the formula above plus gamma*weights/n
        shape= y.shape[0]
        cost_grad=self.cost_gradient(X, y, coeffs)
        other = self.gamma * (np.sum(np.absolute(coeffs))) / (2 * shape)
        return cost_grad+other
        

    def cost_gradient_ridge(self, X, y, coeffs):
        '''
        INPUT: 2 dimensional numpy array, numpy array, numpy array
        OUTPUT: numpy array
        Calculate the gradient of the cost function with regularization
        at the given value for the coeffs.
        Return an array of the same size as the coeffs array.
        '''
        # * you'll need to have the vector of values from the hypothesis function
        hypoth_vector = self.hypothesis(X, coeffs)
                                                              
        shape= y.shape[0]
        cost_grad=self.cost_gradient(X, y, coeffs)
        other = self.gamma * (np.sum(coeffs**2)) / (2 * shape)
        return cost_grad+other
        # * you will also need to calculate the appropriate vector of 
        # coefficients for regularization(weights). Review the notes to see how          
        # simple this is.       

        # * make sure to insert an intercept
        
        # * return the formula above plus gamma*weights/n
                                                              
                                                    

In [2]:
data = np.genfromtxt('../data/grad.csv', delimiter=',')
y = data[:, -1]
X = data[:, 0:-1]


In [3]:
tester=LogisticRegression()
tester.fit(X,y)

ValueError: non-broadcastable output operand with shape (4,1) doesn't match the broadcast shape (4,401)

In [4]:
import pandas as pd
from sklearn.cross_validation import train_test_split
#fit_intercept, threshold, num_iterations, alpha, reglambda, print_details = True, 0.5, 10000, 0.01, 1, False
data = pd.read_csv("../data/grad.csv")
X = data[['gre', 'gpa', 'rank']].values
y = data['admit'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)



In [5]:
tester=LogisticRegression()
tester.fit(X_train,y_train)

ValueError: non-broadcastable output operand with shape (4,1) doesn't match the broadcast shape (4,300)

In [None]:
predicted = tester.predict(X_test)