In [1]:
import sys
if sys.version_info[0] < 3:
    raise Exception("Python 3 not detected.")
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
import scipy
from scipy import io
import math

In [2]:
def logistic_fn(a):
    #return 1/(1+math.exp(-a))
    return scipy.special.expit(a)

In [3]:
 # Get training_data from file
wine_data = io.loadmat("data.mat")
wine_data

{'__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Sun Feb 26 19:25:34 2017',
 '__version__': '1.0',
 '__globals__': [],
 'y': array([[0.],
        [1.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 'X': array([[ 5.8  ,  0.555,  0.26 , ...,  0.46 ,  9.1  ,  0.5  ],
        [ 6.3  ,  0.36 ,  0.19 , ...,  0.52 , 12.7  ,  0.6  ],
        [ 6.4  ,  0.21 ,  0.5  , ...,  0.43 ,  8.8  ,  0.5  ],
        ...,
        [ 7.3  ,  0.2  ,  0.37 , ...,  0.49 , 10.9  ,  0.6  ],
        [ 8.5  ,  0.25 ,  0.27 , ...,  0.33 , 12.   ,  0.6  ],
        [ 6.6  ,  0.36 ,  0.21 , ...,  0.41 ,  9.9  ,  0.6  ]]),
 'description': array(['fixed acidity       ', 'volatile acidity    ',
        'citric acid         ', 'residual sugar      ',
        'chlorides           ', 'free sulfur dioxide ',
        'total sulfur dioxide', 'density             ',
        'pH                  ', 'sulphates           ',
        'alcohol             ', 'quality             '], dtype='<U2

In [4]:
wine_data['description']

array(['fixed acidity       ', 'volatile acidity    ',
       'citric acid         ', 'residual sugar      ',
       'chlorides           ', 'free sulfur dioxide ',
       'total sulfur dioxide', 'density             ',
       'pH                  ', 'sulphates           ',
       'alcohol             ', 'quality             '], dtype='<U20')

In [5]:
X_original= wine_data['X']
print(X_original.shape)
y_original = wine_data['y']
print(y_original.shape)
X_test = wine_data['X_test']
print(X_test.shape)

(6000, 12)
(6000, 1)
(497, 12)


In [6]:
# Attach label into training data to create a whole data set
#m = len(len(y_original))
whole_data =  np.append(X_original,y_original, axis=1)
np.random.seed(42)
copied_data = np.copy(whole_data)
X, y = copied_data[:,:-1], copied_data[:,-1]
print(X.shape)
print(y.shape)

(6000, 12)
(6000,)


In [7]:
# regulation l2
lam = 0.007
#learning rate
alpha = 2.0e-7
w_int = np.zeros(12)
w_int

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [8]:
# l2_of w
w_test = np.array([1,2,3])
from numpy import linalg as LA
l2_w = LA.norm(w_int)
l2_test = LA.norm(w_test)
l2_test

3.7416573867739413

In [9]:
# Define hypothesis function
def hypothesis(X,w):
    #result = 0
    return logistic_fn(np.dot(X,w.T))

#### Problem 4.1 - Batch Gradient Descent

In [10]:
m = len(y)
def Cost_Fn(X,y,w,m):
    sum_errors = 0
    for i in range(m):
        xi = X[i]
        hi = hypothesis(xi,w)
        if y[i] == 1:
            #print("y == 1: hi is", hi)
            if hi < -5:
                errors = y[i]*hi
            elif hi > 5:
                errors = 0
            else:
                errors = y[i]*np.log(hi)
        if y[i] == 0:
            if hi > 7:
                errors = (1 -y[i])*(-hi)
            elif hi < -6:
                errors = 0
            else:
                errors = (1 -y[i])*np.log(1-hi)
        sum_errors += errors
    J = lam*(LA.norm(w))**2 -(1/m)*sum_errors
    return J
print(Cost_Fn(X,y,w_int,m))

0.6931471805600127


In [11]:
def Cost_Fn_Derivative_BGD(X,y,w,j,m,alpha):
    sum_errors = 0
    for i in range(m):
        xi = X[i]
        xij = xi[j]
        hi = hypothesis(X[i],w)
        error = (hi - y[i])*xij
        sum_errors += error
    m = len(y)
    const = float(alpha)/float(m)
    J = 2*lam*LA.norm(w) + const*sum_errors
    return J

In [12]:
def gradient_descent_BGD(X,y,w,m,alpha):
    new_w = np.zeros(len(w))
    const = alpha/m
    for j in range(len(w)):
        CFDerivative = Cost_Fn_Derivative_BGD(X,y,w,j,m,alpha)
        new_w_value = w[j] - CFDerivative
        new_w[j] = new_w_value
    return new_w

In [13]:
w = w_int
cost_each_iter = {}
lst_w = {}
def Logistic_Regression_BGD(X,y,alpha,w,num_iters):
    m = len(y)
    for x in range(num_iters):
        new_w = gradient_descent_BGD(X,y,w,m,alpha)
        w = new_w
        if x%50 == 0:
            cost_fn = Cost_Fn(X,y,w,m)
            cost_each_iter[x] = cost_fn
            lst_w[x] = w
            print("Cost for iteration {} is {}".format(x,cost_fn))

In [14]:
alpha

2e-07

In [16]:
alpha = 1e-08
max_iter = 250
w = w_int
Logistic_Regression_BGD(X,y,alpha,w,max_iter)

Cost for iteration 0 is 0.6931240249379577
Cost for iteration 50 is 0.6909143764263569
Cost for iteration 100 is 0.6769474176025362
Cost for iteration 150 is 0.5603252635378848
Cost for iteration 200 is 0.6010321397500638


In [16]:
print(cost_each_iter)

{}
