In [1]:
# Install liblinear: sudo apt-get install python-liblinear
# from liblinearutil import *
import numpy as np

In [2]:
def read_problem(file_name):
    y = []
    x = []
    feature_len = 22
    for line in open(file_name):
        tmp = line.split(' ',1)
        y = y + [int(tmp[0])]
        vec = np.zeros(feature_len)
        for each in tmp[1].split():
            index,val = each.split(':')
            vec[int(index)-1] = float(val)
        x.append(vec)
    y = np.array(y)
    x = np.array(x)
    return y,x

# Gradient ascent step for dual variables

In [31]:
def get_tp_tn(x,y,w):
    h = x.dot(w)
    ind = np.where(h>0)
    y1 = -1*np.ones_like(y)
    y1[ind] = 1
    tp = sum(np.multiply(1+y,1+y1)/4)
    tn = sum(np.multiply(1-y,1-y1)/4)
    return tp,tn

def get_projection(alpha,beta):
    if alpha>0 and beta>0 and alpha*beta>=1.0/4:
        return alpha,beta
    coef = [16,-16*alpha,0,4*beta,-1]
    roots = np.roots(coef)
    im = np.imag(roots)
    re = np.real(roots)
    for r,i in zip(re,im):
        if i==0 and r>0:
            r1 = 1.0/(4*r)
            return np.round(r,decimals=2),np.round(r1,decimals=2)

# Change it to gradient descent
def gradient_ascent(x,y,w,a,b,alpha=0.1,maxiter=50,tolerence=0.0001):
    n = x.shape[0]
    n_pos = len([i for i in range(n) if y[i]==1])
    n_neg = n-n_pos
    ind_pos = [i for i in range(n) if y[i]>0]
    ind_neg = [i for i in range(n) if y[i]<0]
    x_pos = x[ind_pos]
    x_neg = x[ind_neg]
    tpr,tnr = get_tp_tn(x,y,w)
    tpr /= float(n_pos)
    tnr /= float(n_neg)
    epsilon = 0.0001
    print('TPR =',tpr,'TNR =',tnr)
    for it in range(maxiter):
#         alpha = 1.0/(it+1)
        # for positives
        tmp = 1 - (2*n_pos)/(a*n)*x_pos.dot(w)
        j = np.where(tmp>0)[0]
        tmp = np.zeros_like(tmp)
        tmp[j]=1.0/n_pos
        ga = sum(tmp)-1
        # for negatives
        tmp = 1 + (2*n_neg)/(b*n)*x_neg.dot(w)
        j = np.where(tmp>0)[0]
        tmp = np.zeros_like(tmp)
        tmp[j]=1.0/n_neg
        gb = sum(tmp)-1
        # Update a,b
        if np.sqrt(ga*ga+gb*gb)<tolerence:
            print('1.Returning: ga = {} gb = {}'.format(ga,gb))
            return a,b
        a += alpha*ga
        b += alpha*gb
        # Projection to a,b \in R+ \intersect ab>1/4
        a,b = get_projection(a,b)
        print(a,b)
    print('2.Returning: ga = {} gb = {}'.format(ga,gb))
    return a,b

## Liblinear for primal variable w

In [4]:
# customized liblinear
import sys
sys.path.insert(0, "/home/debojyoti/opt/liblinear-2.1")
from ppython import liblinear
from ppython.liblinear import *
from ppython.liblinearutil import *

In [32]:
def modifyx(x,y,a,b,n,n_pos,n_neg):
    scale_pos = float(2*n_pos)/(a*n)
    scale_neg = float(2*n_neg)/(b*n)
    for i in range(len(y)):
        if y[i]==1:
            x[i].update((key,val*scale_pos) for key,val in x[i].items())
        else:
            x[i].update((key,val*scale_neg) for key,val in x[i].items())
    return x
# init section
a,b = 0.5,0.5
y_lst,x_lst = read_problem('./data/ijcnn1.tr') # x as numpy list
n_dim = len(x_lst[0])
w = np.random.uniform(low=-1,high=1,size=(n_dim,))
# w = np.zeros(n_dim)
y_orig,x_orig = svm_read_problem('./data/ijcnn1.tr') # x in liblinear compatible dictionary format
n = len(y_lst)
n_pos = len([i for i in range(n) if y_lst[i]==1])
n_neg = n-n_pos
c = 5.0
# Iterative section: Gradient Ascent & Liblinear
for i in range(10):
    a,b = gradient_ascent(x_lst,y_lst,w,a,b) #Gradient ascent
    c1 = float(a)/n_pos
    c2 = float(b)/n_neg
    param = parameter('-s 3 -w1 {} -w-1 {} -c {}'.format(c1,c2,c))
    x = modifyx(x_orig,y_orig,a,b,n,n_pos,n_neg)
    prob = problem(y_orig,x)
    model = train(prob,param)
    # Get model parameters
    w = model.get_decfun()[0]
    print('----------Cycle End:  a =',a,'b =',b,'-------------')

TPR = 0.51859443631 TNR = 0.451511793573
0.51 0.49
0.52 0.48
0.53 0.47
0.54 0.46
0.55 0.45
0.56 0.44
0.58 0.43
0.59 0.42
0.6 0.41
0.62 0.41
0.63 0.4
0.64 0.39
0.65 0.38
0.66 0.38
0.67 0.37
0.68 0.37
0.69 0.36
0.7 0.36
0.71 0.35
0.72 0.35
0.73 0.34
0.74 0.34
0.75 0.33
0.76 0.33
0.77 0.32
0.78 0.32
0.79 0.32
0.8 0.31
0.81 0.31
0.82 0.31
0.83 0.3
0.84 0.3
0.85 0.29
0.86 0.29
0.87 0.29
0.88 0.28
0.89 0.28
0.9 0.28
0.91 0.28
0.92 0.27
0.93 0.27
0.94 0.27
0.95 0.26
0.96 0.26
0.97 0.26
0.98 0.26
0.99 0.25
1.0 0.25
1.01 0.25
1.02 0.25
2.Returning: ga = -3.8746783559417963e-14 gb = -0.32464777584269966
----------Cycle End:  a = 1.02 b = 0.25 -------------
TPR = 0.0 TNR = 1.0
1.04 0.24
1.06 0.24
1.08 0.23
1.1 0.23
1.12 0.22
1.14 0.22
1.16 0.22
1.18 0.21
1.2 0.21
1.22 0.21
1.24 0.2
1.26 0.2
1.27 0.2
1.28 0.19
1.3 0.19
1.31 0.19
1.32 0.19
1.33 0.19
1.34 0.19
1.35 0.18
1.36 0.18
1.37 0.18
1.38 0.18
1.39 0.18
1.4 0.18
1.41 0.18
1.42 0.18
1.43 0.17
1.44 0.17
1.45 0.17
1.46 0.17
1.47 0.17
1.48 0.17
1.