In [3]:
"""
LogisticRegression.py

CS440/640: Lab-Week5

Lab goal: 1) Implement logistic regression classifier
"""

import numpy as np 
import matplotlib.pyplot as plt 

class LogisticRegression:
    """
    This class implements a Logistic Regression Classifier.
    """
    
    def __init__(self, input_dim, output_dim):
        """
        Initializes the parameters of the logistic regression classifer to 
        random values.
        
        args:
            input_dim: Number of dimensions of the input data
            output_dim: Number of classes
        """
        
        self.theta = np.random.randn(input_dim, output_dim) / np.sqrt(input_dim)
        self.bias = np.zeros((1, output_dim))
    
    #--------------------------------------------------------------------------
    
    def compute_cost(self,X, y):
        """
        Computes the total cost on the dataset.
        
        args:
            X: Data array
            y: Labels corresponding to input data
        
        returns:
            cost: average cost per data sample
        """
        
#         m = X.shape[0] #number of training examples
#         theta = np.reshape(self.theta,(len(self.theta),1))

#         #y = reshape(y,(len(y),1))

#         J = (1./m) * (-transpose(y).dot(log(sigmoid(X.dot(theta)))) - transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))

#         grad = transpose((1./m)*transpose(sigmoid(X.dot(theta)) - y).dot(X))
#         #optimize.fmin expects a single value, so cannot return grad
#         return J[0][0]#,grad

#         # do the total cost function
        
        # get all number of points
        m = X.shape[0]
        total = 0
        z = hypothesis(self.theta, X) #np.dot(np.transpose(self.theta), X)
        for i in range(len(X)):
            label = y[i]
            cost = ((-1 * label)*np.log(sigmoid(z))) - ((1 - label)*np.log(1 - sigmoid(z)))
            total += cost
        total /= m
        
#         return total

    
    #--------------------------------------------------------------------------
 
    def predict(self,X):
        """
        Makes a prediction based on current model parameters.
        
        args:
            X: Data array
            
        returns:
            predictions: array of predicted labels
        """
        print("X: " + str(len(X)))
        print("Weights: " + str(self.theta.shape))
        z = np.dot(X,np.transpose(self.theta)) + self.bias
        exp_z = np.exp(z)
        softmax_scores = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        predictions = np.argmax(softmax_scores, axis = 1)
        return predictions
        
    #--------------------------------------------------------------------------
    
    def fit(self,X,y):
        """
        Learns model parameters to fit the data.
        """  
        # do the derivative of the total cost function
        z = hypothesis(self.theta, X)
        return sigmoid(z) * (1 - sigmoid(z))
    
#--------------------------------------------------------------------------
#--------------------------------------------------------------------------

def plot_decision_boundary(model, X, y):
    """
    Function to print the decision boundary given by model.
    
    args:
        model: model, whose parameters are used to plot the decision boundary.
        X: input data
        y: input labels
    """
    
    x1_array, x2_array = np.meshgrid(np.arange(-4, 4, 0.01), np.arange(-4, 4, 0.01))
    grid_coordinates = np.c_[x1_array.ravel(), x2_array.ravel()]
    print(grid_coordinates.shape)
    print(X.shape)
    Z = model.predict(grid_coordinates)
    Z = Z.reshape(x1_array.shape)
    plt.contourf(x1_array, x2_array, Z, cmap=plt.cm.bwr)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.bwr)
    plt.show()

#--------------------------------------------------------------------------

# helper function sigmoid to determine the cost
def sigmoid(z):
    return 1 / (1 + (np.e ** (-1 * z)))

def hypothesis(theta,X):
    return np.dot(np.transpose(theta), X)

################################################################################    
X_values = np.genfromtxt('DATA/Linear/X.csv', delimiter=",")
y_values = np.genfromtxt('DATA/Linear/y.csv', delimiter=",")
# print(len(X_values))
# print(len(y_values))

l = LogisticRegression(1000,2)
l.compute_cost(X_values, y_values)
# l.fit(X_values, y_values)
# print(len(X_values))

# plot_decision_boundary(l, X_values, y_values)
# print(x.compute_cost(np.array([1,2]),[1,2]))

# print(x.fit(np.array([1,2]),[1,0]))


    

ValueError: total size of new array must be unchanged