### import modules 

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import seaborn as sns
import mnist
import pylab
import copy

sns.set()

%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### define functions for code

In [39]:

# data loading function
def load_dataset():
    mndata = mnist.MNIST('./python-mnist/data/')
    X_train, labels_train = map(np.array, mndata.load_training())
    X_test, labels_test = map(np.array, mndata.load_testing())
    X_train = X_train/255.0
    X_test = X_test/255.0
    
    labels_train = labels_train.astype('int')
    labels_test = labels_test.astype('int')
    return X_train, X_test,labels_train,labels_test

def convert_2_7(x_train,x_test,labels_train,labels_test):
    #import pdb; pdb.set_trace()
    mask_train = np.logical_or(labels_train==2,labels_train==7)
    mask_test = np.logical_or(labels_test==2,labels_test==7)
   # mask_train = labels_train==2 or labels_train==7
    #mask_test = labels_test==2 or labels_test==7
    
    x_train_c = x_train[mask_train]
    x_test_c = x_test[mask_test]
    

    labels_train_c = labels_train[mask_train]
    labels_test_c = labels_test[mask_test]
    
    labels_train_c[labels_train_c==2] = -1
    labels_train_c[labels_train_c==7] = 1

    labels_test_c[labels_test_c==2] = -1
    labels_test_c[labels_test_c==7] = 1

    return x_train_c,x_test_c,labels_train_c,labels_test_c 

def gradient_method(X,y,lambda_val):
    not_conv = True
    j_vec = []
    w_vec = []
    b_vec = []
    k_vec = []
    k = 0
    step_size = 1e-2
    criteria_conv = 1e-2
    w_old = np.zeros((np.shape(X)[1],1))
    b_old = 0
    w = np.array(w_old)
    b = copy.copy(b_old)
    n = np.shape(X)[0]
    
    while not_conv:
        mu = 1/(1+np.exp(-y*(b+np.dot(X,w))))

        g_w = np.dot(X.T,(mu - y)) + lambda_val*w
        g_b = (1/n)*np.sum(mu-y)
                
        j = (1/n)*np.sum(np.log(1/mu)) + lambda_val*np.dot(w.T,w)
        
        # 
        w = w_old - step_size*g_w
        b = b_old - step_size*g_b

        delta_w = w-w_old
        delta_b = b-b_old
        
        w_old = np.array(w)
        b_old = copy.copy(b)
                
        k_vec.append(k)
        j_vec.append(j)
        
        # check convergence
        if np.sum(delta_w)<criteria_conv and k>1:
            not_conv = False
        else:
            not_conv = False
            k += 1      
        print('the value of delta_w is {}'.format(delta_w))
        print('the value of j is {}'.format(j))
    return j_vec,w_vec,b_vec,k_vec

# def newton_method(lambda_val):
#     #g(w) = g(w) + lambda(w)
#     # h(w) + lambda(I)
#     criteria_conv = 1e-2
#     theta_0 = ...
    
    
#     k = 1
#     conv = False
    
#     while conv:
#         g[k] = np.dot(X.T,(mu - y))
#         h[k] = np.dot() + np.identity(np.lamba_val

    
#         if ():
#             conv = True
#         else ():
#             conv = False
#             k += 1
    
#     return

# def stochastic_gradient_method():
    
#     while conv:
#         g[k] = np.dot(X.T,(mu - y))


    
#         if ():
#             conv = True
#         else ():
#             conv = False
#             k += 1
     
#     return

def plot_objective_train_test(train,test,iter_num,name):
    plt.figure(figsize=(4, 4), dpi=600)
    plt.plot(iter_num,train,label='training data')
    plt.plot(iter_num,test,label='test data')
    plt.xlabel('iteration number')
    plt.ylabel('objective funtion')
    plt.title
    plt.legend()
    plt.savefig(name)
    
def classify(train_data,test_data,train_true,test_true,iter_num):
    
    train_data_class = np.sign(train_data)
    test_data_class = np.sign(test_data)
    
    train_classify_error = [train_data_class == train_true]
    test_classify_error = [test_data_class == test_true]
    
    plt.figure(figsize=(4, 4), dpi=600)
    plt.plot(iter_num,train_classify_err,label='training data')
    plt.plot(iter_num,test_classify_err,label='test data')
    plt.xlabel('iteration number')
    plt.ylabel('misclassification error')
    plt.title('Misclassification error vs. iteration number')
    plt.legend()
    plt.savefig(name)

### Load and convert data 

In [8]:
x_train,x_test,labels_train,labels_test = load_dataset()

In [9]:
x_train_c,x_test_c,labels_train_c,labels_test_c  = convert_2_7(x_train,x_test,labels_train,labels_test)

In [12]:
np.shape(x_train_c)
np.shape(x_train)

(12223, 784)

(60000, 784)

### perform processing 

In [15]:
lambda_val = 10**-1


In [40]:
X = x_train_c
y = labels_train_c
j_vec,w_vec,b_vec,k_vec = gradient_method(X,y,lambda_val)

the value of delta_w is [[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
the value of j is [[ 8472.33798799]]


In [66]:
np.sign([-5,0,5])

array([-1,  0,  1])

In [71]:
np.exp([1,2,3])

array([  2.71828183,   7.3890561 ,  20.08553692])

In [21]:
np.zeros((1,1))

array([[ 0.]])