# KGAG DataLoade

In [None]:
import sys
import os
sys.path.append('..')

from KGAG.dataloader import BeHAVEDataLoader, load_behave_data
import torch
import numpy as np

: 

## Load Data

In [3]:
data_dir = '../dataset/MovieLens_RecBole_KG/BeHAVE/'
loader = BeHAVEDataLoader(data_dir)
loader.load_all()

Loading BeHAVE output files...
Loading user-item interactions from ../dataset/MovieLens_RecBole_KG/BeHAVE/userRatingTrain.txt...
  Loaded 28477 user-item interactions
  Users: 1224, Items: 1591
Loading group members from ../dataset/MovieLens_RecBole_KG/BeHAVE/groupMember.txt...
  Loaded 500 groups
  Average group size: 4.58
Loading group-item interactions from ../dataset/MovieLens_RecBole_KG/BeHAVE/groupRatingTrain.txt...
  Loaded 7087 group-item interactions
Loading knowledge graph from ../dataset/MovieLens_RecBole_KG/BeHAVE/../kg_final.txt...
  Loaded 91631 KG triples
  Entities: 34628, Relations: 24
Data Statistics:
Users: 1224
Items: 1591
Entities: 34628
Relations: 24
Groups: 500
User-Item interactions: 28477
Group-Item interactions: 7087
KG triples: 91631
Group size: min=3, max=6, avg=4.58


## Group Membership

In [4]:
groups = loader.get_group_members()
sample_group_id = list(groups.keys())[9]

print(f"Group {sample_group_id} has {len(groups[sample_group_id])} members")
print(f"Members: {groups[sample_group_id]}")

Group 9 has 4 members
Members: [0, 94, 1, 802]


## User-Item Edges

In [10]:
user_item_edges = loader.get_user_item_edges()
print(f"Shape: {user_item_edges.shape}")
print(f"First 5:\n{user_item_edges[:, :5]}")

Shape: torch.Size([2, 28477])
First 5:
tensor([[0, 0, 0, 0, 0],
        [0, 1, 2, 3, 4]])


## Knowledge Graph Edges

In [5]:
kg_edges = loader.get_kg_edges()
print(f"Shape: {kg_edges.shape}")
print(f"First 5:\n{kg_edges[:, :5]}")

Shape: torch.Size([2, 91631])
First 5:
tensor([[1591,  498, 1593,  632, 1285],
        [ 855, 1592, 1242, 1594, 1595]])


## KG Edges with Relations

In [9]:
kg_edges_rel = loader.get_kg_edges_with_relations()
print(f"Shape: {kg_edges_rel.shape}")
print(f"First 5 triples (head, relation, tail):\n{kg_edges_rel[:, :5]}")

AttributeError: 'tuple' object has no attribute 'shape'

## Vary Negative Sampling Ratio, Experiment

In [21]:
for num_neg in [1]:
    pos, neg = loader.get_training_samples(num_negatives=num_neg)
    ratio = len(neg) / len(pos)
    print(f"num_negatives={num_neg}: pos={len(pos)}, neg={len(neg)}, ratio={ratio:.1f}")

num_negatives=1: pos=7087, neg=7087, ratio=1.0


## ID Mappings

In [None]:
print("Sample User ID mappings:")
for original_id, internal_idx in list(loader.user_id_map.items())[:5]:
    print(f"  Original: {original_id} -> Internal: {internal_idx}")

print("\nSample Item ID mappings:")
for original_id, internal_idx in list(loader.item_id_map.items())[:5]:
    print(f"  Original: {original_id} -> Internal: {internal_idx}")

## Group Item Interaction, Analysis

In [None]:
from collections import Counter

group_interactions_count = Counter([g for g, i in loader.group_item_interactions])
item_popularity = Counter([i for g, i in loader.group_item_interactions])

print(f"Most active groups:")
for group_id, count in group_interactions_count.most_common(5):
    print(f"  Group {group_id}: {count} interactions")

print(f"\nMost popular items:")
for item_id, count in item_popularity.most_common(5):
    print(f"  Item {item_id}: {count} interactions")