In [1]:
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
import sklearn
import numpy as np
import jupyter
import matplotlib
import math
import matplotlib.pyplot as plt
import time

In [2]:
def getdata(file):
    X, y = load_svmlight_file(file)
    X = X.toarray()
    y = y.reshape(y.shape[0],1)
    return X,y

In [3]:
def mbs_data( X , y ):
    temp=np.concatenate((X,y),axis=1)
    np.random.shuffle(temp)
    col=temp.shape[1]
    X_t=temp[:,:col-1]
    y_t=temp[:,col-1:col]
    X_t=np.array(X_t)
    y_t=np.array(y_t)
    return X_t,y_t

In [4]:
def f(x,w):
    sum=0
    for i in range(w.shape[0]):
        sum+=w[i]*x[i]
    return sum

In [5]:
def h_func(x,w):
    g=1/(1+np.exp(-1*f(x,w)))
    return g

In [6]:
def loss_func(X,y,w,lam):
    sum=0
    for i in range(len(y)):
        sum += np.log(1 + np.exp(-y[i]*f(X[i], w)))
    sum = sum / X.shape[0]+ lam / 2 * np.linalg.norm(w) ** 2
    return sum

In [7]:
def loss_gre(X,y,w,lam):
    g = np.zeros(w.shape)
    for i in range(X.shape[0]):
        g = g + np.array(y[i] * X[i] / ( 1 + np.exp( y[i]* f(X[i],w)))).reshape(g.shape)
    g = lam * w -g / X.shape[0]
    return g

In [8]:
def NAG( X , y , X_t , y_t , r , lam , n , epoch, mbs ,threshold):
    w = np.zeros(123).reshape(123,1)
    v = np.zeros(123).reshape(123,1)
    L_NAG = []
    acc=[]   
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs-2):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) ]
            g = loss_gre(X_train,y_train ,w-r*v,lam)
            v = r * v + n * g
            w = w - v
            if(j%200==1):
                L_NAG.append(loss_func(X_t,y_t,w,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t))
    return acc,L_NAG

In [9]:
def RMSProp( X , y , X_t , y_t ,e, r , lam , n , epoch, mbs ,threshold):
    w = np.zeros(123)
    G=0
    L_RMSProp = []
    acc=[]
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            g = loss_gre(X_train,y_train ,w,lam)
            G = r * G + (1-r)*g*g
            w = w - n/(np.sqrt(G+e))*g
            if(j%50==1):
                L_RMSProp.append(loss_func(X_t,y_t,w,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t))
    return acc,L_RMSProp

In [10]:
def AdaDelta( X , y , X_t , y_t ,e, r , lam , n , epoch, mbs ,threshold):
    w = np.zeros(123)
    G=0
    L_AdaDelta = []
    acc=[]
    delta=0
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            g = loss_gre(X_train,y_train ,w,lam)
            G = r * G + (1-r)*g*g
            delta_w = - np.sqrt( delta + e ) / np.sqrt( G + e ) * g
            w = w + delta_w
            delta = r * delta + ( 1 - r) * delta_w * delta_w
            if(j%50==1):
                L_AdaDelta.append(loss_func(X_t,y_t,w,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t)) 
    return acc,L_AdaDelta

In [11]:
def Adam( X , y , X_t , y_t ,e, r , lam , n , epoch, mbs ,threshold,b):
    w = np.zeros(123)
    G=0
    L_Adam = []
    acc=[]
    m=0
    for i in range(epoch):
        X_rand,y_rand = mbs_data(X,y)
        for j in range(X.shape[0]//mbs):
            X_train = X_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            y_train = y_rand[ mbs * ( j % mbs ) : mbs * ( j % mbs + 1 ) - 1 ]
            g = loss_gre(X_train,y_train ,w,lam)
            m = b * m + (1 - b) * g
            G = r * G + (1 - r) * g * g
            a = n * np.sqrt(1 - np.power(r,i))/(1 - np.power(b , i))
            w = w - a * m / np.sqrt( G + e )            
            if(j%50==1):
                L_Adam.append(loss_func(X_t,y_t,w,lam))          
                predict=0
                for j in range(len(y_t)):
                    if h_func(X[j],w)>=threshold:
                        predict+=1           
                acc.append(predict/len(y_t))
    return acc,L_Adam

In [12]:
X_test, y_test = load_svmlight_file('a9a.t',n_features=123)
X_test = X_test.toarray()
y_test = y_test.reshape(y_test.shape[0],1)
X,y=getdata('a9a')

epoch = 10
mini_batch_size=64

lam_NAG = 0.001 
n_NAG=0.01
r_NAG=0.9
threshold_NAG=0.1
acc_NAG=[]
L_NAG=[]

lam_PMSProp = 0.001 
n_PMSProp=0.01
r_PMSProp=0.9
e_PMSProp=np.exp(-8)
threshold_PMSProp=0.1
acc_PMSProp=[]
L_PMSProp=[]

lam_AdaDelta = 0.001 
n_AdaDelta=0.01
r_AdaDelta=0.95
e_AdaDelta=np.exp(-8)
threshold_AdaDelta=0.1
acc_AdaDelta=[]
L_AdaDelta=[]

lam_Adam = 0.001 
n_Adam=0.01
r_Adam=0.999
e_Adam=np.exp(-8)
threshold_Adam=0.5
b_Adam=0.9
acc_Adam=[]
L_Adam=[]

acc_NAG,L_NAG=NAG(X,y,X_test, y_test,r_NAG , lam_NAG , n_NAG , epoch,mini_batch_size,threshold_NAG)
acc_RMSProp,L_RMSProp=RMSProp( X , y , X_test , y_test ,e_PMSProp, r_PMSProp , lam_PMSProp , n_PMSProp , epoch, mini_batch_size ,threshold_PMSProp)
acc_AdaDelta,L_AdaDelta=AdaDelta( X , y , X_test , y_test ,e_AdaDelta, r_AdaDelta , lam_AdaDelta , n_AdaDelta , epoch, mini_batch_size,threshold_AdaDelta )
acc_Adam,L_Adam=Adam( X , y , X_test , y_test ,e_Adam, r_Adam , lam_Adam , n_Adam , epoch, mini_batch_size,threshold_Adam ,b_Adam)


KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots()

ax_e = ax.twinx()

loss_NAG_line = ax.plot(np.arange(len(L_NAG)),L_NAG,label='L_NAG')
loss_RMSProp_line = ax.plot(np.arange(len(L_RMSProp)),L_RMSProp,label='L_RMSProp')
loss_AdaDelta_line = ax.plot(np.arange(len(L_AdaDelta)),L_AdaDelta,label='L_AdaDelta')
loss_Adam_line = ax.plot(np.arange(len(L_Adam)),L_Adam,label='L_Adam')

accuracy_NAG_line = ax_e.plot(np.arange(len(L_NAG)),acc_NAG,'r',label='acc_NAG')
accuracy_RMSProp_line = ax_e.plot(np.arange(len(L_RMSProp)),acc_RMSProp,'b',label='acc_RMSProp')
accuracy_AdaDelta_line = ax_e.plot(np.arange(len(L_AdaDelta)),acc_AdaDelta,'r',label='acc_AdaDelta')
accuracy_Adam_line = ax_e.plot(np.arange(len(L_Adam)),acc_Adam,'b',label='acc_Adam')

#plt.yscale('log')
ax.set(xlabel='Epoch', ylabel='Loss')
ax_e.set_ylabel('Accuracy')
plt.show()
