Here we define S(x) and S(y) in a way which is similar to how we used hop distances for creating matrix.
- S(x) represents nodes which are 2 hops away from x. Effectively this produces nodes on same side of the network as x because traveling two hops would be equivalent to going to x’s neighbor and then coming back again to the same side of x.
- S(y) represents y’s neighbors. These rep- resent nodes on opposite side of y, which is nothing but same side of x, because we have modeled the graph as bipartite in this case.
- Similarity Metrics to consider: Jaccard Similarity, Common Neighbors
- S(x) returns nodes which are similar to x since they are 2 hops away and on same side of x. S(x) can be users and then calculated all the hotel’s neighbors, S(x) can also be hotels and then calculated all user’s neighbors for S(y).


In [1]:
import snap, datetime, json

def load_json(fname):
    with open(fname) as f:
        return json.load(f)
    
def write_json(d, fname):
    with open(fname, 'w') as f:
        f.write(json.dumps(d))

In [2]:
def hopper(nodeid, graph):
    hop_n = snap.TIntV()
    snap.GetNodesAtHop(graph, nodeid, 2, hop_n, True)
    del graph
    return {hop for hop in hop_n}

In [4]:
def cal_hop2_dis(nodes, matrix, graph, checkstr, nodes_2hop):
    '''return nodes that are 2 hops distance for each user id'''
    hops = {}
    for i in matrix:
        if checkstr == 'users':
            nodeid = int(i)
            if nodeid in nodes:
                hops[nodeid] = hopper(nodeid, graph)
        else:
            for j in matrix[i]:
                if int(j) not in nodes_2hop:
                    nodeid = int(j)
                    if nodeid in nodes:
                        hops[nodeid] = hopper(nodeid, graph)
    del nodes, matrix, graph, nodes_2hop
    return hops
            

In [6]:
def get_hotels_neighbor(nodes, matrix, graph):
    neighbors = {}
    for i in matrix:
        for j in matrix[i]:
            if int(j) not in neighbors and int(j) in nodes:
                temp = snap.TIntV()
                snap.GetNodesAtHop(graph, int(j), 1, temp, True)
                neighbors[int(j)] = {t for t in temp}
    del nodes, matrix, graph
    return neighbors

def get_user_neighbor(nodes, matrix, graph):
    neighbors = {}
    for i in matrix:
        if int(i) not in neighbors and int(i) in nodes:
            temp = snap.TIntV()
            snap.GetNodesAtHop(graph,int(i),1,temp,True)
            neighbors[int(i)] = {t for t in temp}
    del nodes, matrix, graph
    return neighbors

In [12]:
def jaccard(a, b):
    return float(len(a.intersection(b)))/float(len(a.union(b)))

def common_neighbors(a,b):
    return len(a.itersection(b))

def cal_similarity(matrix, s, f, nodes_2hop, neighbors, nodes):
    mat = {}
    for i in matrix:
        for j in matrix[i]:
            if int(i) in nodes and int(j) in nodes:
                if s == 'jaccard':
                    mat[i][j] = jaccard(nodes_2hop[int(j)], neighbors[int(j)])
                elif s == 'common_neighbors' :
                    mat[i][j] = common_neighbors(nodes_2hop[int(i)], neighbors[int(j)])
            else:
                mat[i][j]=0 
    return mat




In [13]:
def hotels(matrix, graph, sims, out):
    print "Dealing With Hotels..."
    nodes = [N.GetId() for N in snap.Nodes(graph)]
    nodes_2hop = {}
    print 'Get hop2 nodes for hotels'
    nodes_2hop = cal_hop2_dis(nodes, matrix, graph, "hotel", nodes_2hop)
    print "Get neighbors for hotels"
    neighbors = get_user_neighbor(nodes, matrix, graph)
    
    for s,f in zip(sims, out):
        user_sim = cal_similarity(matrix, s, f, nodes_2hop,neighbors, nodes)
        write_json(user_sim, f)

def users(matrix, graph, sims, out):
    print "Dealing With Users..."
    nodes = [N.GetId() for N in snap.Nodes(graph)]
    nodes_2hop = {}
    print 'Get hop2 nodes for users'
    nodes_2hop = cal_hop2_dis(nodes, matrix, graph, "users", nodes_2hop)
    print "Get neighbors for users"
    neighbors = get_hotels_neighbor(nodes, matrix, graph)
    
    for s,f in zip(sims, out):
        user_sim = cal_similarity(matrix, s, f, nodes_2hop,neighbors, nodes)
        write_json(user_sim, f)



In [None]:
def caller(matrix, graph, usims, uout, hsims, hout):
    start = datetime.datetime.now()
    matrix = load_json(matrix)
    graph = snap.LoadEdgeList(snap.PUNGraph,graph, 0, 1)
    hotels(matrix, graph, hsims, hout)
    users(matrix, graph, usims, uout)

if __name__ == '__main__':
    train = "./data/train/"
    caller(train+'matrix.json', train+'graph.txt', ['jaccard', 'cn'],
         [train + "user_cn.json,", train+'user_jac.json'], ['jaccard', 'cn'],
         [train + 'hotel_cn.json', train+'hotel_jac.json'])
    test = "./data/test/"
    caller(test+'matrix.json', test+'graph.txt', ['jaccard', 'cn'],
         [test+'user_cn.json', test+'user_jac.json'], ['jaccard', 'cn'],
         [test+'hotel_cn.json', test+'hotel_jac.json'],)

Dealing With Hotels...
Get hop2 nodes for hotels
