In [6]:
import pandas as pd
import numpy as np
import json
import pickle
import networkx as nx
from typing import Dict, List, Tuple, Optional
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

In [7]:
class MultiGranularityRecommendationFramework:
    """
    Complete Multi-Granularity POI Recommendation System
    
    Provides recommendations at multiple granularity levels:
    - Level 0: Individual POIs (e.g., "Starbucks @ VivoCity")
    - Level 1: Containers (e.g., "VivoCity Mall")
    - Level 2: Districts (e.g., "HarbourFront District")
    - Level 3: Regions (e.g., "Southern Singapore")
    """
    
    def __init__(self,
                 embeddings_file: str = 'embeddings.pkl',
                 interaction_learning_file: str = 'interaction_learning.pkl',
                 poi_tree_file: str = 'poi_tree.json',
                 users_file: str = 'users.csv',
                 interactions_file: str = 'user_poi_interactions.csv'):
        """Initialize the recommendation framework"""
        print("="*70)
        print("INITIALIZING MULTI-GRANULARITY RECOMMENDATION FRAMEWORK")
        print("="*70)
        
        # Load attribute-based embeddings
        print("\nLoading attribute-based embeddings...")
        with open(embeddings_file, 'rb') as f:
            emb_data = pickle.load(f)
        
        self.user_embeddings = emb_data['user_embeddings']
        self.poi_embeddings = emb_data['poi_embeddings']
        self.user_id_to_idx = emb_data['user_id_to_idx']
        self.X = emb_data['X']
        self.X_A = emb_data['X_A']
        self.X_T = emb_data['X_T']
        
        # Load interaction-based components
        print("Loading interaction-based components...")
        with open(interaction_learning_file, 'rb') as f:
            int_data = pickle.load(f)
        
        self.Theta_u = int_data['Theta_u']
        self.A_l_p = int_data['A_l_p']
        self.G_l = int_data['G_l']
        self.P_l = int_data['P_l']
        self.Q_l = int_data['Q_l']
        self.S_l = int_data['S_l']
        self.U_l_g = int_data['U_l_g']
        
        # Load POI tree
        print("Loading POI tree...")
        with open(poi_tree_file, 'r') as f:
            self.poi_tree = json.load(f)
        
        # Load raw data
        print("Loading user profiles and interactions...")
        self.users_df = pd.read_csv(users_file)
        self.interactions_df = pd.read_csv(interactions_file)
        
        # Build indices
        self._build_indices()
        
        # Initialize hyperparameters
        self.alpha = 0.5   # Weight for feature-based score
        self.beta = 0.3    # Weight for graph-based score
        self.gamma = 0.2   # Weight for hierarchical boost
        
        # Level weights (how much to trust each level)
        self.level_weights = {
            0: 0.6,  # Individual POIs
            1: 0.25, # Containers
            2: 0.10, # Districts
            3: 0.05  # Regions
        }
        
        print("\n" + "="*70)
        print("FRAMEWORK INITIALIZED SUCCESSFULLY")
        print("="*70)
        print(f"Users: {len(self.users_df)}")
        print(f"POIs by level:")
        for level in [0, 1, 2, 3]:
            print(f"  Level {level}: {len(self.poi_tree[f'level_{level}'])} POIs")
        print(f"Interactions: {len(self.interactions_df)}")
    
    def _build_indices(self):
        """Build lookup indices for fast access"""
        # User index
        self.user_id_to_idx = {uid: idx for idx, uid in enumerate(self.users_df['uudi'])}
        self.idx_to_user_id = {idx: uid for uid, idx in self.user_id_to_idx.items()}
        
        # POI indices for each level
        self.poi_id_to_idx = {}
        self.idx_to_poi_id = {}
        
        for level in [0, 1, 2, 3]:
            level_key = f'level_{level}'
            poi_ids = self.poi_embeddings[level_key]['poi_ids']
            self.poi_id_to_idx[level] = {pid: idx for idx, pid in enumerate(poi_ids)}
            self.idx_to_poi_id[level] = {idx: pid for pid, idx in self.poi_id_to_idx[level].items()}
        
        # Build user history lookup (at level 0)
        self._build_user_history()
    
    def _build_user_history(self):
        """Build lookup table for user visit history"""
        self.user_history = defaultdict(lambda: defaultdict(list))
        
        visits = self.interactions_df[self.interactions_df['interaction_type'] == 'visit']
        
        for _, row in visits.iterrows():
            user_id = row['user_id']
            poi_id = row['poi_id']
            
            # Store at level 0
            if poi_id not in self.user_history[user_id][0]:
                self.user_history[user_id][0].append(poi_id)
            
            # Propagate to higher levels
            for level in [1, 2, 3]:
                parent_id = self._get_parent_at_level(poi_id, level)
                if parent_id and parent_id not in self.user_history[user_id][level]:
                    self.user_history[user_id][level].append(parent_id)
    
    def _get_parent_at_level(self, poi_id: str, target_level: int) -> Optional[str]:
        """Get parent node of poi_id at target_level"""
        current_level = 0
        current_id = poi_id
        
        while current_level < target_level:
            level_key = f'level_{current_level}'
            if current_id in self.poi_tree[level_key]:
                parent = self.poi_tree[level_key][current_id].get('parent')
                if parent:
                    current_id = parent
                    current_level += 1
                else:
                    break
            else:
                break
        
        return current_id if current_level == target_level else None
    
    # ========================================================================
    # CORE SCORING FUNCTIONS (Multi-level)
    # ========================================================================
    
    def compute_feature_based_score(self, user_idx: int, poi_idx: int, level: int) -> float:
        """Get feature-based affinity score S^l[u, p]"""
        level_key = f'level_{level}'
        return float(self.S_l[level_key][user_idx, poi_idx])
    
    def compute_graph_based_score(self, user_idx: int, poi_idx: int, level: int) -> float:
        """Compute graph-based context score"""
        level_key = f'level_{level}'
        
        U_g_up = self.U_l_g[level_key][user_idx, poi_idx]
        Q_p = self.Q_l[level_key][poi_idx]
        
        min_len = min(len(U_g_up), len(Q_p))
        U_g_up = U_g_up[:min_len]
        Q_p = Q_p[:min_len]
        
        if np.linalg.norm(U_g_up) == 0 or np.linalg.norm(Q_p) == 0:
            return 0.0
        
        score = np.dot(U_g_up, Q_p) / (np.linalg.norm(U_g_up) * np.linalg.norm(Q_p))
        return float(score)
    
    def compute_hierarchical_boost(self, poi_id: str, user_idx: int, level: int) -> float:
        """
        Compute hierarchical boost from parent/child scores
        
        For coarse levels (2, 3): boost from children's popularity
        For fine levels (0, 1): boost from parent's score
        """
        if level >= 2:
            # Coarse level: aggregate from children
            children = self.poi_tree[f'level_{level}'].get(poi_id, {}).get('children', [])
            if not children:
                return 0.0
            
            child_scores = []
            for child_id in children[:10]:  # Limit to top 10 children
                if level == 2:
                    child_level = 1
                elif level == 3:
                    child_level = 2
                else:
                    continue
                
                if child_id in self.poi_id_to_idx[child_level]:
                    child_idx = self.poi_id_to_idx[child_level][child_id]
                    score = self.compute_feature_based_score(user_idx, child_idx, child_level)
                    child_scores.append(score)
            
            return np.mean(child_scores) if child_scores else 0.0
        
        else:
            # Fine level: boost from parent
            poi_data = self.poi_tree[f'level_{level}'].get(poi_id)
            if not poi_data:
                return 0.0
            
            parent_id = poi_data.get('parent')
            if not parent_id:
                return 0.0
            
            parent_level = level + 1
            if parent_id in self.poi_id_to_idx[parent_level]:
                parent_idx = self.poi_id_to_idx[parent_level][parent_id]
                return self.compute_feature_based_score(user_idx, parent_idx, parent_level)
            
            return 0.0
    
    def compute_multi_granularity_score(self, 
                                       user_id: str, 
                                       poi_id: str,
                                       level: int,
                                       use_hierarchical_boost: bool = True,
                                       use_graph_context: bool = True) -> float:
        """
        Compute recommendation score at specified granularity level
        
        Args:
            user_id: User ID
            poi_id: POI ID at specified level
            level: Granularity level (0-3)
            use_hierarchical_boost: Use hierarchical context
            use_graph_context: Use graph-based context
        
        Returns:
            Recommendation score
        """
        if user_id not in self.user_id_to_idx:
            return 0.0
        
        if poi_id not in self.poi_id_to_idx[level]:
            return 0.0
        
        user_idx = self.user_id_to_idx[user_id]
        poi_idx = self.poi_id_to_idx[level][poi_id]
        
        # Feature-based score
        feature_score = self.compute_feature_based_score(user_idx, poi_idx, level)
        
        # Graph-based score
        graph_score = 0.0
        if use_graph_context:
            graph_score = self.compute_graph_based_score(user_idx, poi_idx, level)
        
        # Hierarchical boost
        hierarchical_score = 0.0
        if use_hierarchical_boost:
            hierarchical_score = self.compute_hierarchical_boost(poi_id, user_idx, level)
        
        # Combine
        final_score = (self.alpha * feature_score + 
                      self.beta * graph_score + 
                      self.gamma * hierarchical_score)
        
        return float(final_score)
    
    # ========================================================================
    # MULTI-GRANULARITY RECOMMENDATION
    # ========================================================================
    
    def recommend_at_level(self,
                          user_id: str,
                          level: int,
                          top_k: int = 10,
                          filter_visited: bool = True,
                          use_constraints: bool = False,
                          **kwargs) -> List[Tuple[str, float, Dict]]:
        """
        Generate recommendations at specific granularity level
        
        Args:
            user_id: User ID
            level: Granularity level (0=individual, 1=container, 2=district, 3=region)
            top_k: Number of recommendations
            filter_visited: Filter out visited POIs
            use_constraints: Apply user constraints (only for level 0)
        
        Returns:
            List of (poi_id, score, poi_info) tuples
        """
        if user_id not in self.user_id_to_idx:
            return []
        
        level_key = f'level_{level}'
        all_poi_ids = list(self.poi_tree[level_key].keys())
        
        # Get visited POIs at this level
        visited_pois = set(self.user_history.get(user_id, {}).get(level, []))
        
        # Score all POIs
        poi_scores = []
        
        for poi_id in all_poi_ids:
            if filter_visited and poi_id in visited_pois:
                continue
            
            score = self.compute_multi_granularity_score(
                user_id, poi_id, level, **kwargs
            )
            
            poi_scores.append((poi_id, score))
        
        # Sort by score
        poi_scores.sort(key=lambda x: x[1], reverse=True)
        
        # Get top-K
        top_pois = poi_scores[:top_k]
        
        # Add POI information
        recommendations = []
        for poi_id, score in top_pois:
            poi_data = self.poi_tree[level_key][poi_id]
            
            # Build info based on level
            if level == 0:
                poi_info = {
                    'name': poi_data['name'],
                    'category': poi_data['data'].get('category', 'N/A'),
                    'price': poi_data['data'].get('price', 'N/A'),
                    'popularity': poi_data['data'].get('popularity', 'N/A'),
                    'region': poi_data['data'].get('region', 'N/A'),
                    'type': 'Individual POI'
                }
            elif level == 1:
                # Container (e.g., mall, building)
                children = poi_data.get('children', [])
                poi_info = {
                    'name': poi_data['name'],
                    'textual': poi_data.get('textual', '')[:100],
                    'num_pois': len(children),
                    'type': 'Container/Venue'
                }
            elif level == 2:
                # District
                children = poi_data.get('children', [])
                poi_info = {
                    'name': poi_data['name'],
                    'textual': poi_data.get('textual', '')[:100],
                    'num_venues': len(children),
                    'type': 'District'
                }
            else:  # level == 3
                # Region
                children = poi_data.get('children', [])
                poi_info = {
                    'name': poi_data['name'],
                    'textual': poi_data.get('textual', '')[:100],
                    'num_districts': len(children),
                    'type': 'Region'
                }
            
            recommendations.append((poi_id, score, poi_info))
        
        return recommendations
    
    def recommend_multi_granularity(self,
                                   user_id: str,
                                   levels: List[int] = [0, 1, 2, 3],
                                   top_k_per_level: int = 5,
                                   filter_visited: bool = True,
                                   **kwargs) -> Dict[int, List[Tuple[str, float, Dict]]]:
        """
        Generate recommendations at multiple granularity levels
        
        Args:
            user_id: User ID
            levels: List of levels to generate recommendations for
            top_k_per_level: Number of recommendations per level
            filter_visited: Filter visited POIs
        
        Returns:
            Dictionary mapping level -> recommendations
        """
        print(f"\n{'='*70}")
        print(f"MULTI-GRANULARITY RECOMMENDATIONS FOR USER: {user_id}")
        print(f"{'='*70}\n")
        
        results = {}
        
        level_names = {
            0: "INDIVIDUAL POIs",
            1: "CONTAINERS/VENUES",
            2: "DISTRICTS",
            3: "REGIONS"
        }
        
        for level in levels:
            print(f"Generating Level {level} ({level_names[level]}) recommendations...")
            recommendations = self.recommend_at_level(
                user_id=user_id,
                level=level,
                top_k=top_k_per_level,
                filter_visited=filter_visited,
                **kwargs
            )
            results[level] = recommendations
        
        return results
    
    def display_multi_granularity_recommendations(self, 
                                                 recommendations: Dict[int, List],
                                                 show_details: bool = True):
        """
        Pretty print multi-granularity recommendations
        
        Args:
            recommendations: Dict mapping level -> list of recommendations
            show_details: Show detailed information
        """
        level_names = {
            0: "LEVEL 0: INDIVIDUAL POIs (Specific Entities)",
            1: "LEVEL 1: CONTAINERS/VENUES (Malls, Buildings, etc.)",
            2: "LEVEL 2: DISTRICTS (Geographic Clusters)",
            3: "LEVEL 3: REGIONS (Large Areas)"
        }
        
        level_descriptions = {
            0: "Specific places you can visit right now",
            1: "Venues containing multiple places of interest",
            2: "Neighborhoods or districts to explore",
            3: "Broader regions for day trips"
        }
        
        print("\n" + "="*70)
        print("MULTI-GRANULARITY RECOMMENDATIONS")
        print("="*70)
        
        for level in sorted(recommendations.keys()):
            recs = recommendations[level]
            
            print(f"\n{level_names[level]}")
            print(f"({level_descriptions[level]})")
            print("-"*70)
            
            if not recs:
                print("  No recommendations at this level")
                continue
            
            for rank, (poi_id, score, poi_info) in enumerate(recs, 1):
                print(f"\n{rank}. {poi_info['name']}")
                print(f"   Score: {score:.4f}")
                print(f"   Type: {poi_info['type']}")
                
                if show_details:
                    if level == 0:
                        print(f"   Category: {poi_info.get('category', 'N/A')}")
                        print(f"   Price: {poi_info.get('price', 'N/A')}")
                        print(f"   Popularity: {poi_info.get('popularity', 'N/A')}")
                    elif level == 1:
                        print(f"   Contains: {poi_info.get('num_pois', 0)} POIs")
                        if poi_info.get('textual'):
                            print(f"   Description: {poi_info['textual']}")
                    elif level == 2:
                        print(f"   Contains: {poi_info.get('num_venues', 0)} venues")
                        if poi_info.get('textual'):
                            print(f"   Description: {poi_info['textual']}")
                    elif level == 3:
                        print(f"   Contains: {poi_info.get('num_districts', 0)} districts")
                        if poi_info.get('textual'):
                            print(f"   Description: {poi_info['textual']}")
    
    def recommend_adaptive_granularity(self,
                                      user_id: str,
                                      context: str = 'general',
                                      top_k: int = 10) -> List[Tuple[str, float, Dict, int]]:
        """
        Adaptively select granularity level based on context
        
        Args:
            user_id: User ID
            context: Context hint ('specific', 'venue', 'exploration', 'general')
            top_k: Total number of recommendations
        
        Returns:
            List of (poi_id, score, poi_info, level) tuples
        """
        # Context-based level distribution
        context_distributions = {
            'specific': {0: 0.8, 1: 0.15, 2: 0.05, 3: 0.0},
            'venue': {0: 0.3, 1: 0.5, 2: 0.15, 3: 0.05},
            'exploration': {0: 0.2, 1: 0.3, 2: 0.3, 3: 0.2},
            'general': {0: 0.5, 1: 0.3, 2: 0.15, 3: 0.05}
        }
        
        distribution = context_distributions.get(context, context_distributions['general'])
        
        # Calculate number of recommendations per level
        recs_per_level = {
            level: max(1, int(top_k * weight))
            for level, weight in distribution.items() if weight > 0
        }
        
        # Generate recommendations at each level
        all_recommendations = []
        
        for level, k in recs_per_level.items():
            level_recs = self.recommend_at_level(user_id, level, top_k=k)
            for poi_id, score, poi_info in level_recs:
                all_recommendations.append((poi_id, score, poi_info, level))
        
        # Sort by score and return top-K
        all_recommendations.sort(key=lambda x: x[1], reverse=True)
        
        return all_recommendations[:top_k]
    
    # ========================================================================
    # EXPORT & VISUALIZATION
    # ========================================================================
    
    def export_multi_granularity_recommendations(self,
                                                recommendations: Dict[int, List],
                                                user_id: str,
                                                output_prefix: str = 'multi_gran_recs'):
        """
        Export multi-granularity recommendations to CSV files
        
        Args:
            recommendations: Dict of recommendations by level
            user_id: User ID
            output_prefix: Prefix for output files
        """
        user_name = self.users_df[self.users_df['uudi'] == user_id].iloc[0]['name']
        
        for level, recs in recommendations.items():
            rows = []
            for rank, (poi_id, score, poi_info) in enumerate(recs, 1):
                row = {
                    'user_id': user_id,
                    'user_name': user_name,
                    'level': level,
                    'rank': rank,
                    'poi_id': poi_id,
                    'poi_name': poi_info['name'],
                    'score': score,
                    'type': poi_info['type']
                }
                
                # Add level-specific fields
                if level == 0:
                    row.update({
                        'category': poi_info.get('category', ''),
                        'price': poi_info.get('price', ''),
                        'popularity': poi_info.get('popularity', '')
                    })
                elif level == 1:
                    row['num_pois'] = poi_info.get('num_pois', 0)
                elif level == 2:
                    row['num_venues'] = poi_info.get('num_venues', 0)
                elif level == 3:
                    row['num_districts'] = poi_info.get('num_districts', 0)
                
                rows.append(row)
            
            df = pd.DataFrame(rows)
            output_file = f'{output_prefix}_level{level}_{user_name}.csv'
            df.to_csv(output_file, index=False)
            print(f"Level {level} recommendations exported to: {output_file}")



In [8]:
if __name__ == "__main__":
    # Initialize framework
    framework = MultiGranularityRecommendationFramework(
        embeddings_file='embeddings.pkl',
        interaction_learning_file='interaction_learning.pkl',
        poi_tree_file='poi_tree_with_uuids.json',
        users_file='user_preferences.csv',
        interactions_file='user_poi_interactions.csv'
    )
    
    # ========================================================================
    # EXAMPLE 1: Multi-Granularity Recommendations (ALL LEVELS)
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 1: MULTI-GRANULARITY RECOMMENDATIONS")
    print("="*70)
    
    user_id = framework.users_df.iloc[0]['uudi']
    user_name = framework.users_df.iloc[0]['name']
    
    # Generate recommendations at all levels
    multi_gran_recs = framework.recommend_multi_granularity(
        user_id=user_id,
        levels=[0, 1, 2, 3],
        top_k_per_level=5,
        filter_visited=True
    )
    
    # Display
    framework.display_multi_granularity_recommendations(multi_gran_recs)
    
    # Export
    framework.export_multi_granularity_recommendations(
        multi_gran_recs, 
        user_id, 
        output_prefix=f'recommendations_{user_name}'
    )
    
    # ========================================================================
    # EXAMPLE 2: Context-Aware Adaptive Granularity
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 2: ADAPTIVE GRANULARITY (Context-Aware)")
    print("="*70)
    
    contexts = ['specific', 'venue', 'exploration', 'general']
    
    for context in contexts:
        print(f"\n--- Context: {context.upper()} ---")
        
        adaptive_recs = framework.recommend_adaptive_granularity(
            user_id=user_id,
            context=context,
            top_k=10
        )
        
        level_names = {0: 'Individual', 1: 'Venue', 2: 'District', 3: 'Region'}
        
        for rank, (poi_id, score, poi_info, level) in enumerate(adaptive_recs, 1):
            print(f"{rank}. [{level_names[level]}] {poi_info['name'][:50]} (score: {score:.3f})")
    
    # ========================================================================
    # EXAMPLE 3: Compare Recommendations Across Levels
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 3: LEVEL-BY-LEVEL COMPARISON")
    print("="*70)
    
    for level in [0, 1, 2, 3]:
        level_names = {
            0: "Individual POIs",
            1: "Containers/Venues",
            2: "Districts",
            3: "Regions"
        }
        
        print(f"\n--- {level_names[level]} (Level {level}) ---")
        
        recs = framework.recommend_at_level(
            user_id=user_id,
            level=level,
            top_k=5,
            filter_visited=True
        )
        
        for rank, (poi_id, score, poi_info) in enumerate(recs, 1):
            print(f"{rank}. {poi_info['name'][:60]:60s} ({score:.4f})")
    
    print("\n" + "="*70)
    print("ALL EXAMPLES COMPLETE!")
    print("="*70)

INITIALIZING MULTI-GRANULARITY RECOMMENDATION FRAMEWORK

Loading attribute-based embeddings...
Loading interaction-based components...
Loading POI tree...
Loading user profiles and interactions...

FRAMEWORK INITIALIZED SUCCESSFULLY
Users: 21
POIs by level:
  Level 0: 4696 POIs
  Level 1: 1355 POIs
  Level 2: 44 POIs
  Level 3: 5 POIs
Interactions: 529

EXAMPLE 1: MULTI-GRANULARITY RECOMMENDATIONS

MULTI-GRANULARITY RECOMMENDATIONS FOR USER: 966592ed-5bfd-4113-9c4d-d93cd3637b40

Generating Level 0 (INDIVIDUAL POIs) recommendations...
Generating Level 1 (CONTAINERS/VENUES) recommendations...
Generating Level 2 (DISTRICTS) recommendations...
Generating Level 3 (REGIONS) recommendations...

MULTI-GRANULARITY RECOMMENDATIONS

LEVEL 0: INDIVIDUAL POIs (Specific Entities)
(Specific places you can visit right now)
----------------------------------------------------------------------

1. CHICHA San Chen
   Score: 88.1245
   Type: Individual POI
   Category: cafe
   Price: 14.38 - 21.94
   Pop