In [1]:
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
import sklearn
import numpy as np
import jupyter
import matplotlib
import math
import matplotlib.pyplot as plt
import time

In [2]:
def getdata(file):
    X, y = load_svmlight_file(file)
    X = X.toarray()
    y = y.reshape(y.shape[0],1)
    return X,y

In [3]:
def mbs_data( X , y ):
    temp=np.concatenate((X,y),axis=1)
    np.random.shuffle(temp)
    col=temp.shape[1]
    X_t=temp[:,:col-1]
    y_t=temp[:,col-1:col]
    X_t=np.array(X_t)
    y_t=np.array(y_t)
    return X_t,y_t

In [4]:
def f(x,w):
    sum=0
    for i in range(w.shape[0]):
        sum+=w[i]*x[i]
    return sum

def h_func(x,w):
    g=1/(1+np.exp(-1*f(x,w)))
    return g

def hinge_loss(w,x,y):
    zero = np.zeros((x.shape[0],1))
    return np.max([zero,1 - y * (x.dot(w))],axis=0)

def loss(W,X,Y,lamda):
    num_records, num_features = np.shape(X)
    hin_loss = hinge_loss(W,X,Y)
    loss = 1.0/float(num_records)*hin_loss.sum() + 1.0/2*lamda*W.transpose().dot(W)
    return loss[0][0]

def gradient(W,X,Y,lamda):
    num_records, num_features = np.shape(X)
    hin_loss = hinge_loss(W,X,Y)
    indicator = np.zeros((num_records, 1))
    indicator[np.nonzero(hin_loss)] = 1
    return - 1.0 / float(num_records) * X.transpose().dot(Y * indicator).sum(axis=1).reshape((num_features, 1)) \
           + lamda * W

def accuracy(W,X,Y,threshold):
    num_records, num_features = np.shape(X)
    R = X.dot(W)
    R[R <= threshold] = -1
    R[R > threshold] = 1
    right = R*Y
    right[right < 0] = 0
    return 1.0/num_records*np.count_nonzero(right)

In [5]:
def NAG( X , y , X_t , y_t , r , lam , n , epoch, mbs ,threshold):
    w = np.zeros(123).reshape(123,1)
    v = np.zeros(123).reshape(123,1)
    L_NAG = []
    acc=[]   
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs-2):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) ]
            g = gradient(w-r*v,X_train,y_train ,lam)
            v = r * v + n * g
            w = w - v
            if(j%50==1):
                L_NAG.append(loss(w,X_t,y_t,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t))
    return acc,L_NAG

def RMSProp( X , y , X_t , y_t ,e, r , lam , n , epoch, mbs ,threshold):
    w = np.zeros(123)
    G=0
    L_RMSProp = []
    acc=[]
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            g = gradient(w,X_train,y_train ,lam)
            G = r * G + (1-r)*g*g
            w = w - n/(np.sqrt(G+e))*g
            if(j%50==1):
                L_RMSProp.append(loss_func(w,X_t,y_t,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t))
    return acc,L_RMSProp

def AdaDelta( X , y , X_t , y_t ,e, r , lam , n , epoch, mbs ,threshold):
    w = np.zeros(123)
    G=0
    L_AdaDelta = []
    acc=[]
    delta=0
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            g = gradient(w,X_train,y_train ,lam)
            G = r * G + (1-r)*g*g
            delta_w = - np.sqrt( delta + e ) / np.sqrt( G + e ) * g
            w = w + delta_w
            delta = r * delta + ( 1 - r) * delta_w * delta_w
            if(j%50==1):
                L_AdaDelta.append(loss_func(w,X_t,y_t,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t)) 
    return acc,L_AdaDelta

def Adam( X , y , X_t , y_t ,e, r , lam , n , epoch, mbs ,threshold,b):
    w = np.zeros(123)
    G=0
    L_Adam = []
    acc=[]
    m=0
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            g = gradient(w,X_train,y_train ,lam)
            m = b * m + (1 - b) * g
            G = r * G + (1 - r) * g * g
            a = n * np.sqrt(1 - np.power(r,i))/(1 - np.power(b , i))
            w = w - a * m / np.sqrt( G + e )            
            if(j%50==1):
                L_Adam.append(loss_func(w,X_t,y_t,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t))
    return acc,L_Adam

In [6]:
X_test, y_test = load_svmlight_file('a9a.t',n_features=123)
X_test = X_test.toarray()
y_test = y_test.reshape(y_test.shape[0],1)
X,y=getdata('a9a')

epoch = 1
mini_batch_size=32

lam_NAG = 0.01 
n_NAG=0.01
r_NAG=0.9
threshold_NAG=0.5
acc_NAG=[]
L_NAG=[]

lam_PMSProp = 0.0001 
n_PMSProp=0.0001
r_PMSProp=0.9
e_PMSProp=np.exp(-8)
threshold_PMSProp=0.5
acc_PMSProp=[]
L_PMSProp=[]

lam_AdaDelta = 0.00001 
n_AdaDelta=0.0001
r_AdaDelta=0.95
e_AdaDelta=np.exp(-8)
threshold_AdaDelta=0.5
acc_AdaDelta=[]
L_AdaDelta=[]

lam_Adam = 0.0001 
n_Adam=0.001
r_Adam=0.999
e_Adam=np.exp(-8)
threshold_Adam=0.5
b_Adam=0.9
acc_Adam=[]
L_Adam=[]

acc_NAG,L_NAG=NAG(X,y,X_test, y_test,r_NAG , lam_NAG , n_NAG , epoch,mini_batch_size,threshold_NAG)
acc_RMSProp,L_RMSProp=RMSProp( X , y , X_test , y_test ,e_PMSProp, r_PMSProp , lam_PMSProp , n_PMSProp , epoch, mini_batch_size ,threshold_PMSProp)
acc_AdaDelta,L_AdaDelta=AdaDelta( X , y , X_test , y_test ,e_AdaDelta, r_AdaDelta , lam_AdaDelta , n_AdaDelta , epoch, mini_batch_size,threshold_AdaDelta )
acc_Adam,L_Adam=Adam( X , y , X_test , y_test ,e_Adam, r_Adam , lam_Adam , n_Adam , epoch, mini_batch_size,threshold_Adam ,b_Adam)


nag
1
51
101
151
201
251
301
351
401
451
501
551
601
651
701
751
801
851
901
951
1001
rmsprop


ValueError: could not broadcast input array from shape (31,1) into shape (31)

In [None]:
fig, ax = plt.subplots()
ax_e = ax.twinx()

loss_NAG_line = ax.plot(np.arange(len(L_NAG)),L_NAG,label='L_NAG')
loss_RMSProp_line = ax.plot(range(epoch),L_RMSProp,label='L_RMSProp')
loss_AdaDelta_line = ax.plot(range(epoch),L_AdaDelta,label='L_AdaDelta')
loss_Adam_line = ax.plot(range(epoch),L_Adam,label='L_Adam')

accuracy_NAG_line = ax_e.plot(np.arange(len(L_NAG)),acc_NAG,'r',label='acc_NAG')
accuracy_RMSProp_line = ax_e.plot(range(epoch),acc_RMSProp,'b',label='acc_RMSProp')
accuracy_AdaDelta_line = ax_e.plot(range(epoch),acc_AdaDelta,'r',label='acc_AdaDelta')
accuracy_Adam_line = ax_e.plot(range(epoch),acc_Adam,'b',label='acc_Adam')

#plt.yscale('log')
ax.set(xlabel='Epoch', ylabel='Loss')
ax_e.set_ylabel('Accuracy')
ax.legend(loc=4)
ax_e.legend(loc=1)
plt.show()
