In [1]:
import pickle
import pandas as pd
import numpy as np
import networkx as nx
from scipy.sparse import lil_matrix, csr_matrix, save_npz
from collections import defaultdict
from tqdm import tqdm

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Weighting parameters
ALPHA = 0.6  # Weight for explicit check-ins (0.6 = 60% explicit, 40% spatial)

INTERACTION_WEIGHTS = {
    'visit': 1.0,
    'rating': 0.8,
    'search': 0.3,
    'like': 0.5
}

SPATIAL_DECAY = 0.5
MAX_HOPS = 2
POI_LEVEL = 0

interactions_df = pd.read_csv('../../Sources/Files/user_poi_interactions.csv')
interactions_df['timestamp'] = pd.to_datetime(interactions_df['timestamp'])

with open('../../Sources/Embeddings/poi_context_graph.pkl', 'rb') as f:
    G = pickle.load(f)

with open('../../Sources/Embeddings/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

# User mappings
user_to_idx = metadata['user_to_index']
idx_to_user = metadata['index_to_user']
num_users = len(metadata['user_ids'])

print(f"\nUsers: {num_users}")

# POI mappings for selected level
poi_level_key = f'level_{POI_LEVEL}'
poi_to_idx = metadata[f'poi_to_index_level_{POI_LEVEL}']
idx_to_poi = metadata[f'index_to_poi_level_{POI_LEVEL}']
num_pois = len(metadata['poi_ids'][poi_level_key])

print(f"POIs (Level {POI_LEVEL}): {num_pois}")
print(f"Matrix shape: {num_users} × {num_pois}")

interaction_pois = set(interactions_df['poi_id'].unique())
metadata_pois = set(metadata['poi_ids'][poi_level_key])

overlap = interaction_pois & metadata_pois
print(f"\nPOI overlap check:")
print(f"  POIs in interactions: {len(interaction_pois)}")
print(f"  POIs in metadata (Level {POI_LEVEL}): {len(metadata_pois)}")
print(f"  Overlap: {len(overlap)}")

if len(overlap) < len(interaction_pois):
    missing = interaction_pois - metadata_pois
    print(f"\n⚠️  Warning: {len(missing)} POIs in interactions not found in metadata")
    print(f"Sample missing POIs: {list(missing)[:5]}")
    
    # Filter interactions to only include POIs in metadata
    print(f"\nFiltering interactions to match metadata POIs...")
    before = len(interactions_df)
    interactions_df = interactions_df[interactions_df['poi_id'].isin(metadata_pois)]
    after = len(interactions_df)
    print(f"Kept {after}/{before} interactions ({after/before*100:.1f}%)")

KeyError: 'user_to_index'