In [1]:
# Cuma Cek Nighbor Loader aja untuk Training nanti

In [2]:
import torch
import numpy as np
from torch_geometric.data import HeteroData
from torch_geometric.loader import NeighborLoader
from torch_geometric.transforms import ToUndirected

In [3]:
def load_csr(path):
    filename = path.split("/")[-1]
    indptr = np.load(f"{path}/{filename}_indptr.npy", mmap_mode="r")
    indices = np.load(f"{path}/{filename}_indices.npy", mmap_mode="r")
    return indptr, indices


def csr_to_edge_index(indptr, indices):
    """
    Convert CSR (indptr, indices) â†’ edge_index [2, num_edges]
    """
    row = np.repeat(
        np.arange(len(indptr) - 1),
        np.diff(indptr)
    )
    col = indices
    edge_index = torch.from_numpy(
        np.vstack([row, col])
    ).long()
    return edge_index


In [4]:
path = "/Users/ymnzaman/Documents/Project/Graph/adjacency"

adj_pekerja       = load_csr(f"{path}/nasabah__is_pekerja__pekerja")
adj_memiliki_simp = load_csr(f"{path}/nasabah__memiliki_simp__simpanan")
adj_memiliki_pinj = load_csr(f"{path}/nasabah__memiliki_pinj__pinjaman")

adj_simp_debit  = load_csr(f"{path}/simpanan__out__transaksi")
adj_simp_credit = load_csr(f"{path}/transaksi__in__simpanan")

adj_pinj_debit  = load_csr(f"{path}/pinjaman__out__transaksi")
adj_pinj_credit = load_csr(f"{path}/transaksi__in__pinjaman")

In [5]:
data = HeteroData()

node_num_dict = {
    "pekerja":    6_250,
    "nasabah":    12_270_075,
    "simpanan":   15_636_712,
    "pinjaman":   1_524_589,
    "transaksi":  12_516_002,
}

for ntype, nnum in node_num_dict.items():
    data[ntype].num_nodes = nnum


data[("nasabah", "is_pekerja", "pekerja")].edge_index = \
    csr_to_edge_index(*adj_pekerja)

data[("nasabah", "memiliki_simp", "simpanan")].edge_index = \
    csr_to_edge_index(*adj_memiliki_simp)

data[("nasabah", "memiliki_pinj", "pinjaman")].edge_index = \
    csr_to_edge_index(*adj_memiliki_pinj)

data[("simpanan", "out", "transaksi")].edge_index = \
    csr_to_edge_index(*adj_simp_debit)

data[("transaksi", "in", "simpanan")].edge_index = \
    csr_to_edge_index(*adj_simp_credit)

data[("pinjaman", "out", "transaksi")].edge_index = \
    csr_to_edge_index(*adj_pinj_debit)

data[("transaksi", "in", "pinjaman")].edge_index = \
    csr_to_edge_index(*adj_pinj_credit)




In [6]:
data = ToUndirected()(data)

In [12]:
loader = NeighborLoader(
    data,
    input_nodes=("pekerja", torch.arange(data["pekerja"].num_nodes)),
    num_neighbors={
        ("nasabah", "is_pekerja", "pekerja"): [8]*5,
        ("pekerja", "rev_is_pekerja", "nasabah"): [8]*5,

        ("nasabah", "memiliki_simp", "simpanan"): [8]*5,
        ("simpanan", "rev_memiliki_simp", "nasabah"): [8]*5,

        ("nasabah", "memiliki_pinj", "pinjaman"): [8]*5,
        ("pinjaman", "rev_memiliki_pinj", "nasabah"): [8]*5,

        ("transaksi", "in", "simpanan"): [8]*5,
        ("simpanan", "rev_in", "transaksi"): [8]*5,

        ("transaksi", "in", "pinjaman"): [8]*5,
        ("pinjaman", "rev_in", "transaksi"): [8]*5,

        ("simpanan", "out", "transaksi"): [8]*5,
        ("transaksi", "rev_out", "simpanan"): [8]*5,

        ("pinjaman", "out", "transaksi"): [8]*5,
        ("transaksi", "rev_out", "pinjaman"): [8]*5,
    },
    batch_size=1024,
    shuffle=True
)

In [90]:
batch = next(iter(loader))

for etype in batch.edge_types:
    print(f"{etype}: {batch[etype].edge_index.size(1)} edges")


('nasabah', 'is_pekerja', 'pekerja'): 7847 edges
('nasabah', 'memiliki_simp', 'simpanan'): 32699 edges
('nasabah', 'memiliki_pinj', 'pinjaman'): 7137 edges
('simpanan', 'out', 'transaksi'): 367 edges
('transaksi', 'in', 'simpanan'): 354 edges
('pinjaman', 'out', 'transaksi'): 0 edges
('transaksi', 'in', 'pinjaman'): 0 edges
('pekerja', 'rev_is_pekerja', 'nasabah'): 7457 edges
('simpanan', 'rev_memiliki_simp', 'nasabah'): 8056 edges
('pinjaman', 'rev_memiliki_pinj', 'nasabah'): 4470 edges
('transaksi', 'rev_out', 'simpanan'): 357 edges
('simpanan', 'rev_in', 'transaksi'): 366 edges
('transaksi', 'rev_out', 'pinjaman'): 0 edges
('pinjaman', 'rev_in', 'transaksi'): 0 edges


In [91]:
 data["pekerja"].num_nodes

6250

In [36]:
for etype in data.edge_types:
    ei = data[etype].edge_index
    print(etype, ei.shape)
    print("src min/max:", ei[0].min().item(), ei[0].max().item())
    print("dst min/max:", ei[1].min().item(), ei[1].max().item())


('nasabah', 'is_pekerja', 'pekerja') torch.Size([2, 64110])
src min/max: 1624 12253120
dst min/max: 0 6249
('nasabah', 'memiliki_simp', 'simpanan') torch.Size([2, 187998376])
src min/max: 0 12270074
dst min/max: 0 15636711
('nasabah', 'memiliki_pinj', 'pinjaman') torch.Size([2, 12682596])
src min/max: 19 12270026
dst min/max: 0 1524588
('simpanan', 'out', 'transaksi') torch.Size([2, 387529])
src min/max: 39 13681336
dst min/max: 2770 12515931
('transaksi', 'in', 'simpanan') torch.Size([2, 394667])
src min/max: 2770 12516001
dst min/max: 39 13681336
('pinjaman', 'out', 'transaksi') torch.Size([2, 1006])
src min/max: 1291 996726
dst min/max: 12058205 12515173
('transaksi', 'in', 'pinjaman') torch.Size([2, 1006])
src min/max: 12058205 12515173
dst min/max: 1291 996726
