In [10]:
from scipy.sparse import csr_matrix, hstack, vstack, random
S = random(2, 3, density=0.5)
S_ = random(6, 7, density=0.5)
B = random(3, 7, density=0.5)
S.shape, S_.shape, B.shape

((2, 3), (6, 7), (3, 7))

In [34]:
def get_Z(n_rows, n_cols):
    return csr_matrix(([], ([], [])), shape=(n_rows, n_cols))

def stack_quadrant(tl, tr, bl, br):
    if (tl is None and tr is None) or (bl is None and br is None) or \
       (tl is None and bl is None) or (tr is None and br is None):
        print('Warning: Unstackable! Size of zero matrices not known.')
        return None
    if tl is None:
        tl = get_Z(tr.shape[0], bl.shape[1])
    if tr is None:
        tr = get_Z(tl.shape[0], br.shape[1])
    if bl is None:
        bl = get_Z(br.shape[0], tl.shape[1])
    if br is None:
        br = get_Z(bl.shape[0], tr.shape[1])
#     print(tl, tr, bl, br)
    l = vstack([tl, bl])
    r = vstack([tr, br])
    return hstack([l, r])

tl = stack_quadrant(S_, None, B, S.T)
br = tl.T
merged = stack_quadrant(tl, None, None, br)

In [38]:
(merged.tocsr())

<24x24 sparse matrix of type '<class 'numpy.float64'>'
	with 80 stored elements in Compressed Sparse Row format>

In [28]:
import pandas as pd
import os
def load_bipartite_hypergraph(data_params):
    id_p_map = pd.read_csv(os.path.join(data_params['home_path'], data_params['r_label_file']), sep='\t', header=None)
    id_a_map = pd.read_csv(os.path.join(data_params['home_path'], data_params['u_label_file']), sep='\t', header=None)
    id_a_map = dict(zip(id_a_map[0], id_a_map[1]))
    id_k_map = pd.read_csv(os.path.join(data_params['home_path'], data_params['v_label_file']), sep='\t', header=None)
    id_k_map = dict(zip(id_k_map[0], id_k_map[1]))
    p_a_list_map = pd.read_csv(os.path.join(data_params['home_path'], data_params['r_u_list_file']), sep=':', header=None)
    p_k_list_map = pd.read_csv(os.path.join(data_params['home_path'], data_params['r_v_list_file']), sep=':', header=None)
    n_p, na, nk = len(id_p_map), len(id_a_map), len(id_k_map)
    pos_A = list(map(lambda x: list(map(int, x.split(','))), p_a_list_map[1]))
    pos_B = list(map(lambda x: list(map(int, x.split(','))), p_k_list_map[1]))    
    # I, J, V: row, col, value of author-hypergraph
    # I_, J_, V_: row, col, value of keyword-hypergraph
    # I_B, J_B, V_B: row, col, value of author_hyperedge-keyword_hyperedge link
    I=[]
    J=[]
    V=[]
    I_=[]
    J_=[]
    V_=[]

    I_B=[]
    J_B=[]
    V_B=[]
    U_set=set()
    V_set=set()
    u_map={}
    v_map={}
    j_u=-1
    j_v=-1
    for u,v in zip(pos_A,pos_B):
        u=frozenset(u)
        v=frozenset(v)

        if u not in U_set:
            j_u+=1
            U_set.add(u)
            u_map[u]=j_u
            I.extend(list(u))
            J.extend([j_u]*len(u))
            V.extend([1]*len(u))
        if v not in V_set:
            j_v+=1
            V_set.add(v)
            v_map[v]=j_v
            I_.extend(list(v))
            J_.extend([j_v]*len(v))
            V_.extend([1]*len(v))

        I_B.append(u_map[u])
        J_B.append(v_map[v])
        V_B.append(1)

    n=max(I)+1
    m=len(U_set)
    n_=max(I_)+1
    m_=len(V_set)
    S = csr_matrix((V, (I, J)), shape=(n, m))
    S_ = csr_matrix((V_, (I_, J_)), shape=(n_, m_))
    B = csr_matrix((V_B, (I_B, J_B)), shape=(m, m_))    
    return S,S_,B







# home_path = '/content/drive/My Drive/projects/textual_analysis_email/'

home_path = 'sample_data/'

# sample_path = os.path.join(home_path, 'sample_data')

data_params = {'home_path': home_path,
               'r_label_file': 'id_p_map.txt',
               'u_label_file': 'id_a_map.txt',
               'v_label_file': 'id_k_map.txt',
               'r_u_list_file': 'p_a_list_train.txt',
               'r_v_list_file': 'p_k_list_train.txt',
               'emb_pkl_file': 'nodevectors.pkl'}
# methods = [commonneigh, admic_adar, jaccard]
# method_name_map = dict(zip(methods, ['CN', 'AA', 'JC']))
num_iter = 2

S,S_,B = load_bipartite_hypergraph(data_params)

In [39]:
S.todense()

matrix([[1, 0, 1, 1, 0, 0, 0],
        [1, 1, 0, 1, 1, 1, 0],
        [1, 0, 0, 0, 0, 1, 1],
        [0, 1, 1, 1, 0, 0, 0],
        [0, 0, 1, 1, 1, 0, 1]], dtype=int64)

In [41]:
S_.todense()

matrix([[0, 0, 1, 0, 0, 0],
        [1, 0, 1, 1, 0, 1],
        [1, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 1, 1],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 1, 0, 1, 1]], dtype=int64)

In [45]:
X = B[:, [0, 1]]
X.todense()

matrix([[1, 0],
        [0, 1],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 1],
        [0, 0]], dtype=int64)

In [52]:
X[(X.sum(axis=1)!=0).nonzero()[0], :].todense()

matrix([[1, 0],
        [0, 1],
        [0, 1]], dtype=int64)

In [64]:
import scipy, torch

In [60]:
home_path = 'sample_data/'
# home_path = 'main_data/'

data_params = {'home_path': home_path,
               'r_label_file': 'id_p_map.txt',
               'u_label_file': 'id_a_map.txt',
               'v_label_file': 'id_k_map.txt',
               'r_u_list_file': 'p_a_list_train.txt',
               'r_v_list_file': 'p_k_list_train.txt',
               'emb_pkl_file': 'nodevectors.pkl'}
S,S_,B = load_bipartite_hypergraph(data_params)
list_S = [S, S]
list_S_ = [S_, S_]
list_B = [B, B]

In [98]:
torch.cuda.device_count()

1

In [95]:
pwd

'/storage2/home2/e1-313-15477/swyam/SEAL/Python'

In [90]:
subhg_sp_n.to_dense()

tensor([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]])

In [91]:
subhg_sp_n_.to_dense()

tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])

In [94]:
subhg_sp_m_.to_dense()

tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])