In [4]:
%% Machine Learning Online Class - Exercise 2: Logistic Regression
%
%  Instructions
%  ------------
% 
%  This file contains code that helps you get started on the second part
%  of the exercise which covers regularization with logistic regression.
%
%  You will need to complete the following functions in this exericse:
%
%     sigmoid.m
%     costFunction.m
%     predict.m
%     costFunctionReg.m
%
%  For this exercise, you will not need to change any code in this file,
%  or any other files other than those mentioned above.
%

%% Initialization
%clear ; close all; clc

%% Load Data
%  The first two columns contains the X values and the third column
%  contains the label (y).

data = load('exercises/ex2/ex2data2.txt');
X = data(:, [1, 2]); y = data(:, 3);

Skip plotting.

In [None]:
%plotData(X, y);

% Put some labels 
%hold on;

% Labels and Legend
%xlabel('Microchip Test 1')
%ylabel('Microchip Test 2')

% Specified in plot order
%legend('y = 1', 'y = 0')
%hold off;

# Part 1: Regularized Logistic Regression
In this part, you are given a dataset with data points that are not
linearly separable. However, you would still like to use logistic 
regression to classify the data points.

To do so, you introduce more features to use -- in particular, you add
polynomial features to our data matrix (similar to polynomial
regression).

In [18]:
function out = mapFeature(X1, X2)
    % MAPFEATURE Feature mapping function to polynomial features
    %
    %   MAPFEATURE(X1, X2) maps the two input features
    %   to quadratic features used in the regularization exercise.
    %
    %   Returns a new feature array with more features, comprising of
    %   X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc..
    %
    %   Inputs X1, X2 must be the same size
    %

    degree = 6;
    out = ones(size(X1(:,1)));
    for i = 1:degree
        for j = 0:i
            out(:, end+1) = (X1.^(i-j)).*(X2.^j);
        end
    end


end

% size(X)

% Note that mapFeature also adds a column of ones for us, so the intercept term is handled
X = mapFeature(X(:,1), X(:,2));

% size(X)

Cost function.

In [45]:
% first sigmoid function
function g = sigmoid(z)
    %SIGMOID Compute sigmoid functoon
    %   J = SIGMOID(z) computes the sigmoid of z.

    % You need to return the following variables correctly

    g = zeros(size(z));

    % ====================== YOUR CODE HERE ======================
    % Instructions: Compute the sigmoid of each value of z (z can be a matrix,
    %               vector or scalar).


    for i = 1:rows(z)
        for j = 1:columns(z)
            g(i,j) = 1 / (1 + e^-z(i,j));
        end
    end    

    #z(1,1)


    % =============================================================
end

function [J, grad] = costFunctionReg(theta, X, y, lambda)
    %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
    %   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
    %   theta as the parameter for regularized logistic regression and the
    %   gradient of the cost w.r.t. to the parameters.

    % Initialize some useful values
    m = length(y); % number of training examples

    % You need to return the following variables correctly
    J = 0;
    grad = zeros(size(theta));

    % ====================== YOUR CODE HERE ======================
    % Instructions: Compute the cost of a particular choice of theta.
    %               You should set J to the cost.
    %               Compute the partial derivatives and set grad to the partial
    %               derivatives of the cost w.r.t. each parameter in theta


    XOK = X(:, 2:end);
    thetaOK = theta(2:end,:);

    %h = sigmoid(XOK*thetaOK);
    h = sigmoid(X*theta);
    %J = ( -y'*log(h) - (1-y)'*log(1-h) ) / m + lambda * theta.^2 / 2*m;
    J = ( -y'*log(h) - (1-y)'*log(1-h) ) / m;

    grad(1,:) = X(:,1)'*(h - y) / m;
    grad(2:end,:) = XOK'*(h - y) / m + lambda*thetaOK / m;

    % =============================================================

end

% Initialize fitting parameters
initial_theta = zeros(size(X, 2), 1);

% Set regularization parameter lambda to 1
lambda = 1;

%size(initial_theta(2:end,:))
%size(X(:, 2:end))

%size(y)
% Compute and display initial cost and gradient for regularized logistic
% regression
[cost, grad] = costFunctionReg(initial_theta, X, y, lambda);

cost
grad

%fprintf('Cost at initial theta (zeros): %f\n', cost);

cost =  0.69315
grad =

   8.4746e-03
   1.8788e-02
   7.7771e-05
   5.0345e-02
   1.1501e-02
   3.7665e-02
   1.8356e-02
   7.3239e-03
   8.1924e-03
   2.3476e-02
   3.9349e-02
   2.2392e-03
   1.2860e-02
   3.0959e-03
   3.9303e-02
   1.9971e-02
   4.3298e-03
   3.3864e-03
   5.8382e-03
   4.4763e-03
   3.1008e-02
   3.1031e-02
   1.0974e-03
   6.3157e-03
   4.0850e-04
   7.2650e-03
   1.3765e-03
   3.8794e-02



# Part 2: Regularization and Accuracies
**Optional Exercise**:
In this part, you will get to try different values of lambda and 
see how regularization affects the decision coundart

Try the following values of lambda (0, 1, 10, 100).

How does the decision boundary change when you vary lambda? How does
the training set accuracy vary?

In [None]:
% Initialize fitting parameters
initial_theta = zeros(size(X, 2), 1);

% Set regularization parameter lambda to 1 (you should vary this)
lambda = 1;

% Set Options
options = optimset('GradObj', 'on', 'MaxIter', 400);

% Optimize
[theta, J, exit_flag] = ...
	fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options);

% Plot Boundary
%plotDecisionBoundary(theta, X, y);
%hold on;
%title(sprintf('lambda = %g', lambda))

% Labels and Legend
%xlabel('Microchip Test 1')
%ylabel('Microchip Test 2')

%legend('y = 1', 'y = 0', 'Decision boundary')
%hold off;

% Compute accuracy on our training set
p = predict(theta, X);

fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);


