##### Bianry train

In [1]:
import numpy as np
from numpy import linalg as LA
from scipy.sparse import spdiags
from scipy.sparse import csr_matrix

def binaryTrain(X, Y, options):
    lamda = options['lamda']
    k = options['k']
    maxIter = options['maxIter']
    
    _pow = 8
    N = X.shape[0]
    
    trainIdx = Y.nonzero()[0]
    Ntr = len(trainIdx)
    
    wpos = len((Y == -1).nonzero()[0])/float(len((Y == 1).nonzero()[0]))
    wneg = 1
    
    weight = np.zeros(len(Y))
    weight[Y == 1] = wpos
    weight[Y == -1] = wneg
    
    if len(Y) != N:
        print 'Error: Number of elements in X and Y must same'
        return
    
    degree = np.squeeze(np.asarray(np.sum(X, 1)))
    logd = np.log2(2 + degree)
    
    invD = spdiags(1./degree, 0, N, N).tocsr()
    
    alpha = 1 - 1./np.log2(2 + degree)
    ALPHA = spdiags(alpha, 0, N, N).tocsr()
    COMPALPHA = spdiags(1 - alpha, 0, N, N).tocsr()
    
    M = COMPALPHA * invD * X
    
    w = np.random.rand(1, X.shape[1])
    w = np.matrix(w/np.sqrt(lamda)*LA.norm(w))
    
    Tolerance=1e-6

    for t in range(maxIter):
        if (t%100 == 0):
            print 'iteration # ', str(t), '/', str(maxIter)
        w_old = w
        
        pred = X*w.H
        propogatedPred = np.zeros(shape=pred.shape)
        
        for i in range(_pow):
            propogatedPred = M*propogatedPred + pred
        propogatedPred = M*propogatedPred + pred
        propogatedPred = np.squeeze(np.asarray(np.multiply(alpha, propogatedPred.flatten())))
        
        b = np.mean(Y[trainIdx] - propogatedPred[trainIdx])
        
        idx = np.random.choice(Ntr, k, replace=False)
        randomSubset = trainIdx[idx]
        
        idx1 = (np.multiply(np.sign(propogatedPred[randomSubset] + b), Y[randomSubset]) < 1).nonzero()[0]
        
        misClass = randomSubset[idx1]
        grad = weight[misClass]*Y[misClass]*alpha[misClass]
        part = np.matrix(grad*M[misClass])
        At = csr_matrix(part.shape)
        for i in range(_pow):
            At = At*M + part
        grad = At*X + np.matrix(grad)*X[misClass]
        grad = np.multiply(lamda*w, logd - 1/k*grad)
        etat = 1./(2 + lamda*t)
        w1 = w - etat*grad
        w = min(1, 1/(np.sqrt(lamda)*LA.norm(w1)))*w1
        
        if LA.norm(w - w_old) < Tolerance:
            break
    
    if t < maxIter:
        print 'W converged in ', str(t),' iterations.'
    else:
        print 'W not converged in ', str(t),' iterations.'
     
    
    pred = X*w.H
    propogatedPred = np.zeros(shape=pred.shape)
    
    for i in range(_pow):
        propogatedPred = M*propogatedPred + pred
    propogatedPred = M*propogatedPred + pred
    propogatedPred = np.squeeze(np.asarray(np.multiply(alpha, propogatedPred.flatten())))
        
    b = np.mean(Y[trainIdx] - propogatedPred[trainIdx])
    
    
    Tr = np.sum(np.sign((propogatedPred + b)[trainIdx]) == Y[trainIdx])
    F = Ntr - Tr
    TrainAccuracy = 100.*Tr/(Tr + F)
    print 'Accuracy on Training set = ', str(TrainAccuracy),' %\n'

    model = {
        'w': w,
        'b': b
    }

    return model

##### Snowball sampling

In [2]:
import numpy as np

""" Snowball Sampling 
    INPUT:
        graph:        adj. mat. of a graph
        tr_fraction:  fraction of training nodes, default is 0.1
    OUTOUT:
        Train:        indices of training nodes
"""
def snowball_sampling(graph, tr_fraction=0.1):
    TrFraction = tr_fraction

    graph = graph + graph.T
    graph[graph > 0] = 1

    sparseGraph = graph.nonzero()

    TrFraction = 1 - TrFraction
    
    Nodes = np.unique(sparseGraph)
    n_Test = int(np.floor(len(Nodes)*TrFraction))

    Test = np.zeros(n_Test)
    Train = np.zeros(len(Nodes) - n_Test)

    n_seed = int(np.ceil(n_Test*0.02))
    Seed = np.random.choice(Nodes, n_seed, replace=False)
    Selected = Seed

    while len(Selected) < n_Test:
        tmp_Neighbor = Nodes[np.squeeze(np.asarray(np.sum(graph[Seed], axis=0).ravel())).nonzero()[0]]
        Neighbor = []
        for n in tmp_Neighbor:
            if n in Selected:
                continue
            Neighbor.append(n)

        tmp_Selected = []
        if len(Neighbor) > 0:
            tmp_Selected = np.random.choice(np.array(Neighbor), int(len(Neighbor)*TrFraction/2), replace=False)

        if len(tmp_Selected) == 0:
            UnSelected = np.setdiff1d(np.array(range(graph.shape[0])), Selected, assume_unique=True)
            Seed = np.random.choice(UnSelected, np.min([n_seed, len(UnSelected)]), replace=False)
            Selected = np.unique(np.append(Selected, Seed))
        else:
            Selected = np.unique(np.append(Selected, tmp_Selected))
            Seed = tmp_Selected
    Test = Selected[0 : n_Test]
    Train = np.setdiff1d(np.array(range(graph.shape[0])), Test, assume_unique=True)
    return Train

##### Test input: amazon dataset

In [3]:
import scipy.io

mat = scipy.io.loadmat('Datasets/amazon.mat')

A = mat['graph'].tocsr()

truth = mat['label'].toarray()[:,0] # test on the first label
truth[truth == 0] = -1
tr = int(len(truth) * 0.1)

#tr_idx = np.random.choice(len(truth), tr, replace=False) # random select
tr_idx = snowball_sampling(A, tr_fraction=0.1) # snowball sampling

y = np.zeros(len(truth))
y[tr_idx] = truth[tr_idx]
lamda = pow(2, -6)
imax = 1000
k = 1000

options = {
    'lamda': lamda,
    'k': k,
    'maxIter': imax
}


##### Test predict: training data

In [None]:
model = binaryTrain(A, y, options)
print 'baseline accuracy = ', 100 - float(len((y == 1).nonzero()[0]))/(len(tr_idx))*100 # acc. of all -1

iteration #  0 / 1000
iteration #  100 / 1000
iteration #  200 / 1000
iteration #  300 / 1000
iteration #  400 / 1000


##### Binary predict

In [None]:
def binaryPredict(X, model):
    N = X.shape[0]
    
    w = model['w']
    b = model['b']
    
    degree = np.squeeze(np.asarray(np.sum(X, 1)))
    logd = np.log2(2 + degree)
    
    invD = spdiags(1./degree, 0, N, N).tocsr()
    
    alpha = 1 - 1./np.log2(2 + degree)
    ALPHA = spdiags(alpha, 0, N, N).tocsr()
    COMPALPHA = spdiags(1 - alpha, 0, N, N).tocsr()
    
    M = COMPALPHA * invD * X
    
    _pow = 8
    pred = X*w.H
    propogatedPred = np.zeros(shape=pred.shape)
        
    for i in range(_pow):
        propogatedPred = M*propogatedPred + pred
    propogatedPred = M*propogatedPred + pred
    propogatedPred = np.squeeze(np.asarray(np.multiply(alpha, propogatedPred.flatten())))

    propogatedPred = propogatedPred + b
    
    return np.sign(propogatedPred)
    

In [None]:
bp_res = binaryPredict(A, model)
print 'Pre = ', np.unique(bp_res, return_counts=True)
print 'Act = ', np.unique(truth, return_counts=True)