In [None]:
from utils import sigmoid

import numpy as np


def logistic_predict(weights, data):
    """ Compute the probabilities predicted by the logistic classifier.

    Note: N is the number of examples
          M is the number of features per example

    :param weights: A vector of weights with dimension (M + 1) x 1, where
    the last element corresponds to the bias (intercept).
    :param data: A matrix with dimension N x M, where each row corresponds to
    one data point.
    :return: A vector of probabilities with dimension N x 1, which is the output
    to the classifier.
    """

    bias_column = np.ones((data.shape[0],1))
    X_updated = np.concatenate((data, bias_column),axis = 1)
    z = np.dot(X_updated,weights)
    y = sigmoid(z)

    return y

In [None]:
def evaluate(targets, y):
    """ Compute evaluation metrics.

    Note: N is the number of examples
          M is the number of features per example

    :param targets: A vector of targets with dimension N x 1.
    :param y: A vector of probabilities with dimension N x 1.
    :return: A tuple (ce, frac_correct)
        WHERE
        ce: (float) Averaged cross entropy
        frac_correct: (float) Fraction of inputs classified correctly
    """
    t = np.array(targets).T
    y = np.array(y)
    ce = np.average(-np.dot(t,np.log(y))-np.dot((1-t),np.log(1-y)))      #cross entropy; returns a scalar

    sum1 = np.sum((abs(targets-y) < 0.5))    #returns a scalar
    sum2 = np.sum((y==1)*(abs(targets-y == 0.5))) #edge case where diff of 0.5 counts as correct if t = 1

    frac_correct = (sum1+sum2)/y.size

    return ce, frac_correct

In [None]:
def logistic(weights, data, targets, hyperparameters):
    """ Calculate the cost and its derivatives with respect to weights.
    Also return the predictions.

    Note: N is the number of examples
          M is the number of features per example

    :param weights: A vector of weights with dimension (M + 1) x 1, where
    the last element corresponds to the bias (intercept).
    :param data: A matrix with dimension N x M, where each row corresponds to
    one data point.
    :param targets: A vector of targets with dimension N x 1.
    :param hyperparameters: The hyperparameter dictionary.
    :returns: A tuple (f, df, y)
        WHERE
        f: The average of the loss over all data points.
           This is the objective that we want to minimize.
        df: (M + 1) x 1 vector of derivative of f w.r.t. weights.
        y: N x 1 vector of probabilities.
    """
    y = logistic_predict(weights, data)
    f, fraction = evaluate(targets,y)
    dw = np.dot((y-targets).T,data)
    db = np.dot((y-targets).T,np.ones((y.shape[0],1)))
    df = np.concatenate((dw,db),axis = 1).T

    return f, df, y

In [None]:
from check_grad import check_grad
from utils import *
from logistic import *

import matplotlib.pyplot as plt
import numpy as np

def run_check_grad(hyperparameters):
    """ Performs gradient check on logistic function.
    :return: None
    """
    # This creates small random data with 20 examples and
    # 10 dimensions and checks the gradient on that data.
    num_examples = 20
    num_dimensions = 10

    weights = np.random.randn(num_dimensions + 1, 1)
    data = np.random.randn(num_examples, num_dimensions)
    targets = np.random.rand(num_examples, 1)

    diff = check_grad(logistic,
                      weights,
                      0.001,
                      data,
                      targets,
                      hyperparameters)

    print("diff =", diff)

In [None]:
def run_logistic_regression():
    train_inputs, train_targets = load_train()
    valid_inputs, valid_targets = load_valid()
    test_inputs, test_targets = load_test()

    N, M = train_inputs.shape

    hyperparameters = {
        "learning_rate": 0.001,
        "weight_regularization": 0,
        "num_iterations": 50
    }
    weights = np.ones((M+1,1))
    weights-=0.8


    # Verify that logistic function produces the right gradient.
    # diff should be very close to 0.
    run_check_grad(hyperparameters)


    # Begin learning with gradient descent
    train_ce =[]
    train_accuracy = []
    val_ce = []
    val_accuracy = []
    its = hyperparameters["num_iterations"]
    lr = hyperparameters["learning_rate"]

    for t in range(its):
        train_J,train_dJ,train_y = logistic(weights,train_inputs,train_targets,hyperparameters)
        train_cr_en, train_acc = evaluate(train_targets, train_y)

        train_ce.append(train_cr_en)
        train_accuracy.append(train_acc)

        weights-=lr*train_dJ

        val_J, val_dj, val_y = logistic(weights,valid_inputs,valid_targets, hyperparameters)
        val_cr_en, val_acc = evaluate(valid_targets, val_y)

        val_ce.append(val_cr_en)
        val_accuracy.append(val_acc)

    test_y = logistic_predict(weights,test_inputs)
    test_ce, test_accuracy = evaluate(test_targets, test_y)
    print("test_accuracy = ", test_accuracy)


    plt.figure(0)
    plt.plot(range(1,its+1),train_ce,label = "training cross entropy")
    plt.plot(range(1,its+1),val_ce,label = "validation cross entropy")
    plt.title("mnist_train: Cross Entropy vs. Iterations")
    plt.xlabel("iterations")
    plt.ylabel("cross entropy")
    plt.legend(loc='best')
    plt.show()


    plt.figure(1)
    plt.plot(range(1,its+1),train_accuracy,label = "training accuracy")
    plt.plot(range(1,its+1),val_accuracy,label = "validation accuracy")
    plt.title("mnist_train: Accuracy vs. Iterations")
    plt.xlabel("iterations")
    plt.ylabel("accuracy")
    plt.legend(loc='best')
    plt.show()

    return test_ce, test_accuracy

In [None]:
if __name__ == "__main__":
    run_logistic_regression()