In [1]:
import json
import pickle
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model, optimizers, losses
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
from datetime import datetime
import os


In [7]:
class AttentionAggregator(Model):
    """
    MLP-based Attention Aggregator for inter-level POI feature propagation (TensorFlow)
    
    Steps:
    1. Compute attention scores via MLP
    2. Apply softmax normalization
    3. Weighted aggregation of child embedding,s
    """
    
    def __init__(self, 
				input_dim: int, 
                output_dim: int,
				hidden_dim: int = 64, 
				dropout_rate: float = 0.1, 
                 **kwargs):
        """
        Args:
            input_dim: Dimension of child POI embeddings
            hidden_dim: Hidden dimension for attention MLP
            dropout_rate: Dropout rate
        """
        super().__init__(**kwargs)
        
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate
        
        # MLP for attention score computation
        self.attention_dense1 = layers.Dense(
            hidden_dim, 
            activation='relu',
            kernel_initializer='glorot_uniform',
            name='attention_dense1'
        )
        self.attention_dropout1 = layers.Dropout(dropout_rate)
        
        self.attention_dense2 = layers.Dense(
            hidden_dim // 2,
            activation='relu',
            kernel_initializer='glorot_uniform',
            name='attention_dense2'
        )
        self.attention_dropout2 = layers.Dropout(dropout_rate)
        
        self.attention_output = layers.Dense(
            1,
            kernel_initializer='glorot_uniform',
            name='attention_output'
        )
        
        # Output transformation
        self.output_dense = layers.Dense(
            input_dim,
            kernel_initializer='glorot_uniform',
            name='output_dense'
        )
        self.output_layernorm = layers.LayerNormalization(name='output_layernorm')
        self.output_activation = layers.ReLU()
    
    def call(self, child_embeddings: tf.Tensor, mask: Optional[tf.Tensor] = None, training: bool = False) -> Tuple[tf.Tensor, tf.Tensor]:
        """
        Aggregate child embeddings using attention
        
        Args:
            child_embeddings: (num_children, embed_dim) or (batch, max_children, embed_dim)
            mask: Optional mask for padding (1 = valid, 0 = padding)
            training: Whether in training mode
        
        Returns:
            aggregated: (embed_dim,) or (batch, embed_dim)
            attention_weights: (num_children,) or (batch, max_children)
        """
        # Handle single parent case (2D input)
        squeeze_output = False
        if len(child_embeddings.shape) == 2:
            child_embeddings = tf.expand_dims(child_embeddings, 0)  # (1, num_children, embed_dim)
            squeeze_output = True
            if mask is not None:
                mask = tf.expand_dims(mask, 0)
        
        batch_size = tf.shape(child_embeddings)[0]
        num_children = tf.shape(child_embeddings)[1]
        
        # Step 1: Compute attention scores via MLP
        # (batch, num_children, embed_dim) -> (batch, num_children, hidden_dim)
        x = self.attention_dense1(child_embeddings)
        x = self.attention_dropout1(x, training=training)
        x = self.attention_dense2(x)
        x = self.attention_dropout2(x, training=training)
        
        # (batch, num_children, 1) -> (batch, num_children)
        attention_scores = tf.squeeze(self.attention_output(x), axis=-1)
        
        # Apply mask if provided (set padded positions to -inf)
        if mask is not None:
            mask = tf.cast(mask, tf.float32)
            attention_scores = attention_scores + (1.0 - mask) * (-1e9)
        
        # Step 2: Softmax normalization
        attention_weights = tf.nn.softmax(attention_scores, axis=-1)  # (batch, num_children)
        
        # Handle NaN from empty sequences
        attention_weights = tf.where(
            tf.math.is_nan(attention_weights),
            tf.zeros_like(attention_weights),
            attention_weights
        )
        
        # Step 3: Weighted aggregation
        # (batch, 1, num_children) @ (batch, num_children, embed_dim) -> (batch, 1, embed_dim)
        attention_weights_expanded = tf.expand_dims(attention_weights, 1)
        aggregated = tf.matmul(attention_weights_expanded, child_embeddings)
        aggregated = tf.squeeze(aggregated, axis=1)  # (batch, embed_dim)
        
        # Output transformation
        aggregated = self.projection(aggregated)  # (batch, output_dim)
        aggregated = self.output_layernorm(aggregated)
        
        if squeeze_output:
            aggregated = tf.squeeze(aggregated, axis=0)
            attention_weights = tf.squeeze(attention_weights, axis=0)
        
        return aggregated, attention_weights
    
    def get_config(self):
        config = super().get_config()
        config.update({
            'input_dim': self.input_dim,
            'output_dim': self.output_dim,
            'hidden_dim': self.hidden_dim,
            'dropout_rate': self.dropout_rate
        })
        return config


class InterLevelPOIAggregator:
    """
    Build A^l_p: Inter-level POI Feature Matrix (TensorFlow)
    
    Propagates features from child POIs to parent POIs using attention mechanism.
    """
    
    def __init__(self,
				poi_tree_file: str,
				poi_embeddings_file: str,
				metadata_file: str,
				hidden_dim: int = 64,
				dropout_rate: float = 0.1):
        """
        Initialize aggregator
        
        Args:
            poi_tree_file: Path to poi_tree_with_uuids.json
            poi_embeddings_file: Path to poi_embeddings.pkl
            metadata_file: Path to metadata.pkl
            hidden_dim: Hidden dimension for attention MLP
            dropout_rate: Dropout rate
        """
        print("=" * 60)
        print("Initializing Inter-Level POI Aggregator (TensorFlow)")
        print("=" * 60)
        
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate
        
        # Load POI tree
        print(f"\nLoading POI tree from: {poi_tree_file}")
        with open(poi_tree_file, 'r', encoding='utf-8') as f:
            self.poi_tree = json.load(f)
        
        # Load POI embeddings
        print(f"Loading POI embeddings from: {poi_embeddings_file}")
        with open(poi_embeddings_file, 'rb') as f:
            self.poi_embeddings_data = pickle.load(f)
        
        # Load metadata
        print(f"Loading metadata from: {metadata_file}")
        with open(metadata_file, 'rb') as f:
            self.metadata = pickle.load(f)
        
        # Build parent-children mappings
        print("\nBuilding parent-children mappings...")
        self.parent_to_children = self._build_parent_children_mapping()
        
        # Initialize attention aggregators for each level transition
        self.aggregators: Dict[int, AttentionAggregator] = {}
        
        # Store results
        self.A_lp: Dict[str, np.ndarray] = {}
        self.attention_weights: Dict[str, Dict] = {}
        
        print("\nInitialization complete!")
        self._print_summary()
    
    def _print_summary(self):
        """Print data summary"""
        print("\n" + "-" * 40)
        print("Data Summary:")
        for level in range(4):
            level_key = f'level_{level}'
            if level_key in self.poi_embeddings_data['poi_embeddings']:
                shape = self.poi_embeddings_data['poi_embeddings'][level_key]['embeddings'].shape
                n_children = sum(
                    len(children) 
                    for children in self.parent_to_children.get(level, {}).values()
                )
                print(f"  Level {level}: {shape[0]} POIs, embed_dim={shape[1]}, total_children={n_children}")
    
    def _build_parent_children_mapping(self) -> Dict[int, Dict[str, List[str]]]:
        """
        Build mapping: parent_level -> {parent_id -> [child_ids]}
        
        Returns:
            Dictionary: level -> {parent_poi_id -> [child_poi_ids]}
        """
        parent_to_children = {1: {}, 2: {}, 3: {}}
        
        # For each level, find children from the level below
        for child_level in range(3):  # 0, 1, 2
            parent_level = child_level + 1
            child_level_key = f'level_{child_level}'
            
            if child_level_key not in self.poi_tree:
                continue
            
            for child_id, child_data in self.poi_tree[child_level_key].items():
                parent_id = child_data.get('parent')
                
                if parent_id:
                    if parent_id not in parent_to_children[parent_level]:
                        parent_to_children[parent_level][parent_id] = []
                    parent_to_children[parent_level][parent_id].append(child_id)
        
        # Print statistics
        for level in [1, 2, 3]:
            n_parents = len(parent_to_children[level])
            if n_parents > 0:
                avg_children = np.mean([len(c) for c in parent_to_children[level].values()])
                max_children = max(len(c) for c in parent_to_children[level].values())
                print(f"  Level {level}: {n_parents} parents, avg_children={avg_children:.1f}, max={max_children}")
        
        return parent_to_children
    
    def _get_level_embeddings(self, level: int) -> Tuple[np.ndarray, List[str]]:
        """Get all embeddings at a level"""
        level_key = f'level_{level}'
        level_data = self.poi_embeddings_data['poi_embeddings'][level_key]
        return level_data['embeddings'], level_data['poi_ids']
    
    def build_A_lp_level0(self) -> np.ndarray:
        """
        Build A^0_p for Level 0 (Building)
        
        At level 0, there are no children to aggregate.
        A^0_p is simply the base POI embeddings Y^0.
        
        Returns:
            A^0_p: (n_pois_level0, embed_dim)
        """
        print(f"\n{'=' * 60}")
        print("Building A^0_p (Level 0 - Building)")
        print("=" * 60)
        
        Y_0, poi_ids = self._get_level_embeddings(level=0)
        
        # A^0_p = Y^0 (base embeddings, no aggregation needed)
        A_0_p = Y_0.copy()
        
        print(f"  A^0_p = Y^0 (base POI embeddings)")
        print(f"  Shape: {A_0_p.shape}")
        
        self.A_lp['level_0'] = A_0_p
        self.attention_weights['level_0'] = None  # No attention at level 0
        
        return A_0_p
    
    def build_A_lp_higher_level(self,
								level: int,
								train_attention: bool = True,
								n_epochs: int = 100,
								lr: float = 0.001,
								batch_size: int = 32) -> np.ndarray:
        """
        Build A^l_p for Level 1, 2, or 3 using attention aggregation
        
        Args:
            level: Target level (1, 2, or 3)
            train_attention: Whether to train attention weights
            n_epochs: Training epochs for attention
            lr: Learning rate
            batch_size: Batch size for training
        
        Returns:
            A^l_p: (n_pois_at_level, embed_dim)
        """
        print(f"\n{'=' * 60}")
        print(f"Building A^{level}_p (Level {level})")
        print("=" * 60)
        
        child_level = level - 1
        child_level_key = f'level_{child_level}'
        
        # Get child embeddings (use A^(l-1)_p, not raw Y^(l-1))
        if child_level_key not in self.A_lp:
            raise ValueError(f"Must build A^{child_level}_p before A^{level}_p")
        
        child_A_lp = self.A_lp[child_level_key]
        child_poi_ids = self.poi_embeddings_data['poi_embeddings'][child_level_key]['poi_ids']
        child_id_to_idx = {pid: idx for idx, pid in enumerate(child_poi_ids)}
        
        child_embed_dim = child_A_lp.shape[1]
        
        # Get parent POI IDs at this level
        level_key = f'level_{level}'
        parent_embeddings, parent_poi_ids = self._get_level_embeddings(level)
        n_parents = len(parent_poi_ids)
        
        print(f"  Child level {child_level}: {len(child_poi_ids)} POIs, embed_dim={child_embed_dim}")
        print(f"  Parent level {level}: {n_parents} POIs")
        
        # Initialize attention aggregator
        aggregator = AttentionAggregator(
            input_dim=child_embed_dim,
            output_dim=child_embed_dim,
            hidden_dim=self.hidden_dim,
            dropout_rate=self.dropout_rate
        )
        
        # Build the model by calling it once
        dummy_input = tf.zeros((1, 1, child_embed_dim))
        _ = aggregator(dummy_input)
        
        self.aggregators[level] = aggregator
        
        # Get parent-children mapping for this level
        parent_children_map = self.parent_to_children.get(level, {})
        
        if train_attention:
            # Training mode: Learn attention weights
            print(f"\n  Training attention aggregator ({n_epochs} epochs)...")
            self._train_attention_aggregator(
                aggregator=aggregator,
                child_A_lp=child_A_lp,
                child_id_to_idx=child_id_to_idx,
                parent_poi_ids=parent_poi_ids,
                parent_children_map=parent_children_map,
                parent_embeddings=parent_embeddings,
                n_epochs=n_epochs,
                lr=lr,
                batch_size=batch_size
            )
        
        # Inference: Compute aggregated embeddings
        print(f"\n  Computing aggregated embeddings...")
        
        parent_aggregated = []
        parent_attn_weights = {}
        
        for parent_idx, parent_id in enumerate(parent_poi_ids):
            children_ids = parent_children_map.get(parent_id, [])
            
            if len(children_ids) == 0:
                # No children: Use parent's own embedding from Y^l
                aggregated = parent_embeddings[parent_idx]
                attn_weights = np.array([1.0])
            else:
                # Get children embeddings
                child_indices = [
                    child_id_to_idx[cid]
                    for cid in children_ids
                    if cid in child_id_to_idx
                ]
                
                if len(child_indices) == 0:
                    aggregated = parent_embeddings[parent_idx]
                    attn_weights = np.array([1.0])
                else:
                    child_embeds = tf.constant(
                        child_A_lp[child_indices],
                        dtype=tf.float32
                    )
                    
                    aggregated, attn_weights = aggregator(child_embeds, training=False)
                    aggregated = aggregated.numpy()
                    attn_weights = attn_weights.numpy()
            
            parent_aggregated.append(aggregated)
            parent_attn_weights[parent_id] = {
                'children': children_ids[:len(attn_weights)] if len(children_ids) > 0 else [],
                'weights': attn_weights.tolist()
            }
        
        A_lp = np.stack(parent_aggregated, axis=0).astype(np.float32)
        
        print(f"\n  A^{level}_p shape: {A_lp.shape}")
        print(f"  Value range: [{A_lp.min():.4f}, {A_lp.max():.4f}]")
        
        self.A_lp[level_key] = A_lp
        self.attention_weights[level_key] = parent_attn_weights
        
        return A_lp
    
    def _train_attention_aggregator(self,
                                    aggregator: AttentionAggregator,
                                    child_A_lp: np.ndarray,
                                    child_id_to_idx: Dict[str, int],
                                    parent_poi_ids: List[str],
                                    parent_children_map: Dict[str, List[str]],
                                    parent_embeddings: np.ndarray,
                                    n_epochs: int,
                                    lr: float,
                                    batch_size: int):
        """
        Train attention aggregator using reconstruction loss
        
        Objective: Aggregated children embeddings should reconstruct parent embedding
        Loss = MSE(aggregated_children, parent_embedding)
        """
        optimizer = optimizers.Adam(learning_rate=lr)
        mse_loss = losses.MeanSquaredError()
        
        # Prepare training data
        training_data = []
        
        for parent_idx, parent_id in enumerate(parent_poi_ids):
            children_ids = parent_children_map.get(parent_id, [])
            child_indices = [
                child_id_to_idx[cid]
                for cid in children_ids
                if cid in child_id_to_idx
            ]
            
            if len(child_indices) > 0:
                training_data.append({
                    'parent_idx': parent_idx,
                    'child_indices': child_indices,
                    'parent_embed': parent_embeddings[parent_idx]
                })
        
        if len(training_data) == 0:
            print("    No training data available, using uniform attention")
            return
        
        print(f"    Training samples: {len(training_data)}")
        
        # Training loop
        for epoch in range(n_epochs):
            total_loss = 0.0
            n_batches = 0
            
            # Shuffle training data
            np.random.shuffle(training_data)
            
            for data in training_data:
                child_embeds = tf.constant(
                    child_A_lp[data['child_indices']],
                    dtype=tf.float32
                )
                parent_embed = tf.constant(
                    data['parent_embed'],
                    dtype=tf.float32
                )
                
                with tf.GradientTape() as tape:
                    aggregated, _ = aggregator(child_embeds, training=True)
                    loss = mse_loss(parent_embed, aggregated)
                
                gradients = tape.gradient(loss, aggregator.trainable_variables)
                optimizer.apply_gradients(zip(gradients, aggregator.trainable_variables))
                
                total_loss += loss.numpy()
                n_batches += 1
            
            avg_loss = total_loss / n_batches
            
            if (epoch + 1) % 20 == 0 or epoch == 0:
                print(f"    Epoch {epoch + 1}/{n_epochs}, Loss: {avg_loss:.6f}")
    
    def build_all_A_lp(self,
					train_attention: bool = True,
					n_epochs: int = 100,
					lr: float = 0.001,
					batch_size: int = 32) -> Dict[str, np.ndarray]:
        """
        Build A^l_p for all levels (0 to 3)
        
        Must be built in order: Level 0 -> 1 -> 2 -> 3
        
        Args:
            train_attention: Whether to train attention weights
            n_epochs: Training epochs
            lr: Learning rate
            batch_size: Batch size
        
        Returns:
            Dictionary: level_key -> A^l_p matrix
        """
        print("\n" + "=" * 60)
        print("Building A^l_p for All Levels")
        print("=" * 60)
        
        # Level 0: Base embeddings
        self.build_A_lp_level0()
        
        # Levels 1, 2, 3: Attention aggregation from children
        for level in [1, 2, 3]:
            self.build_A_lp_higher_level(
                level=level,
                train_attention=train_attention,
                n_epochs=n_epochs,
                lr=lr,
                batch_size=batch_size
            )
        
        return self.A_lp
    
    def build_Q_l(self, level: int) -> np.ndarray:
        """
        Build complete POI representation Q^l = [Y_A || Y_T || A^l_p]
        
        Args:
            level: Target level
        
        Returns:
            Q^l: (n_pois, total_dim)
        """
        level_key = f'level_{level}'
        
        # Get Y_A and Y_T from poi_embeddings
        poi_data = self.poi_embeddings_data['poi_embeddings'][level_key]
        
        Y_A = poi_data.get('Y_A')
        Y_T = poi_data.get('Y_T')
        A_lp = self.A_lp[level_key]
        
        components = []
        component_names = []
        
        if Y_A is not None:
            components.append(Y_A)
            component_names.append(f'Y_A ({Y_A.shape[1]})')
        
        if Y_T is not None:
            components.append(Y_T)
            component_names.append(f'Y_T ({Y_T.shape[1]})')
        
        components.append(A_lp)
        component_names.append(f'A^{level}_p ({A_lp.shape[1]})')
        
        # Ensure same number of POIs
        n_pois = min(c.shape[0] for c in components)
        components = [c[:n_pois] for c in components]
        
        Q_l = np.hstack(components).astype(np.float32)
        
        print(f"\n  Q^{level} = [{' || '.join(component_names)}]")
        print(f"  Shape: {Q_l.shape}")
        
        return Q_l
    
    def build_all_Q_l(self) -> Dict[str, np.ndarray]:
        """
        Build Q^l for all levels
        
        Returns:
            Dictionary: level_key -> Q^l matrix
        """
        Q_l_all = {}
        
        print("\n" + "-" * 40)
        print("Building Q^l = [Y_A || Y_T || A^l_p] for all levels")
        print("-" * 40)
        
        for level in range(4):
            Q_l_all[f'level_{level}'] = self.build_Q_l(level)
        
        return Q_l_all
    
    def save_results(self, output_file: str = 'poi_interlevel_features.pkl'):
        """
        Save A^l_p and Q^l for all levels
        """
        print(f"\n{'=' * 60}")
        print(f"Saving results to: {output_file}")
        print("=" * 60)
        
        # Build Q^l for all levels
        Q_l_all = self.build_all_Q_l()
        
        # Save aggregator weights
        aggregator_weights = {}
        for level, agg in self.aggregators.items():
            aggregator_weights[level] = {
                name: var.numpy() 
                for name, var in zip(
                    [v.name for v in agg.trainable_variables],
                    agg.trainable_variables
                )
            }
        
        save_data = {
            # Inter-level POI features
            'A_lp': self.A_lp,
            
            # Complete POI representations
            'Q_l': Q_l_all,
            
            # Attention weights (for interpretability)
            'attention_weights': self.attention_weights,
            
            # Trained aggregator weights
            'aggregator_weights': aggregator_weights,
            
            # Aggregator config for reconstruction
            'aggregator_config': {
                'hidden_dim': self.hidden_dim,
                'dropout_rate': self.dropout_rate
            },
            
            # Dimensions
            'dimensions': {
                f'level_{level}': {
                    'A_lp_shape': self.A_lp[f'level_{level}'].shape,
                    'Q_l_shape': Q_l_all[f'level_{level}'].shape
                }
                for level in range(4)
            },
            
            # Metadata
            'info': {
                'created_at': datetime.now().isoformat(),
                'formula': 'Q^l = [Y_A || Y_T || A^l_p]',
                'aggregation': 'MLP Attention (TensorFlow)',
                'hidden_dim': self.hidden_dim,
                'framework': 'tensorflow',
                'tf_version': tf.__version__
            }
        }
        
        with open(output_file, 'wb') as f:
            pickle.dump(save_data, f)
        
        file_size = os.path.getsize(output_file) / (1024 * 1024)
        print(f"\n  File size: {file_size:.2f} MB")
        
        print("\n  Saved matrices:")
        for level in range(4):
            level_key = f'level_{level}'
            print(f"    Level {level}:")
            print(f"      A^{level}_p: {self.A_lp[level_key].shape}")
            print(f"      Q^{level}: {Q_l_all[level_key].shape}")
        
        return save_data


def build_A_lp_and_Q_l(poi_tree_file: str = 'poi_tree_with_uuids.json',
                        poi_embeddings_file: str = 'poi_embeddings.pkl',
                        metadata_file: str = 'metadata.pkl',
                        output_file: str = 'poi_interlevel_features.pkl',
                        hidden_dim: int = 64,
                        dropout_rate: float = 0.1,
                        n_epochs: int = 100,
                        lr: float = 0.001) -> Dict:
    """
    Main function to build A^l_p and Q^l for all levels
    
    Args:
        poi_tree_file: Path to POI tree JSON
        poi_embeddings_file: Path to POI embeddings pickle
        metadata_file: Path to metadata pickle
        output_file: Output file path
        hidden_dim: Hidden dimension for attention MLP
        dropout_rate: Dropout rate
        n_epochs: Training epochs
        lr: Learning rate
    
    Returns:
        Dictionary with A^l_p and Q^l for all levels
    """
    aggregator = InterLevelPOIAggregator(
        poi_tree_file=poi_tree_file,
        poi_embeddings_file=poi_embeddings_file,
        metadata_file=metadata_file,
        hidden_dim=hidden_dim,
        dropout_rate=dropout_rate
    )
    
    # Build A^l_p for all levels
    aggregator.build_all_A_lp(
        train_attention=True,
        n_epochs=n_epochs,
        lr=lr
    )
    
    # Save results
    save_data = aggregator.save_results(output_file)
    
    return save_data


class AttentionAggregatorLoader:
    """
    Utility class to load and use trained attention aggregators
    """
    
    def __init__(self, saved_file: str = 'poi_interlevel_features.pkl'):
        """
        Load saved attention aggregators
        
        Args:
            saved_file: Path to saved pickle file
        """
        with open(saved_file, 'rb') as f:
            self.data = pickle.load(f)
        
        self.A_lp = self.data['A_lp']
        self.Q_l = self.data['Q_l']
        self.attention_weights = self.data['attention_weights']
        self.config = self.data['aggregator_config']
    
    def get_A_lp(self, level: int) -> np.ndarray:
        """Get A^l_p for a specific level"""
        return self.A_lp[f'level_{level}']
    
    def get_Q_l(self, level: int) -> np.ndarray:
        """Get Q^l for a specific level"""
        return self.Q_l[f'level_{level}']
    
    def get_attention_weights(self, level: int, parent_id: str) -> Dict:
        """Get attention weights for a specific parent POI"""
        level_key = f'level_{level}'
        if level_key in self.attention_weights and self.attention_weights[level_key]:
            return self.attention_weights[level_key].get(parent_id, {})
        return {}
    
    def get_top_k_children(self, level: int, parent_id: str, k: int = 5) -> List[Tuple[str, float]]:
        """
        Get top-k children by attention weight for a parent POI
        
        Args:
            level: Parent level
            parent_id: Parent POI ID
            k: Number of top children to return
        
        Returns:
            List of (child_id, attention_weight) tuples
        """
        attn_data = self.get_attention_weights(level, parent_id)
        
        if not attn_data or 'children' not in attn_data:
            return []
        
        children = attn_data['children']
        weights = attn_data['weights']
        
        pairs = list(zip(children, weights))
        pairs.sort(key=lambda x: x[1], reverse=True)
        
        return pairs[:k]

In [8]:
if __name__ == "__main__":
    poi_tree_file = "../../Sources/Files/poi_tree_with_uuids.json"
    poi_embeddings_file = "../../Sources/Embeddings/poi_embeddings.pkl"
    metadata_file = "../../Sources/Embeddings/metadata.pkl"
    output_file = "../../Sources/Embeddings/poi_interlevel_features.pkl"
    
    print("=" * 60)
    print("INTER-LEVEL POI FEATURE (A^l_p) GENERATION - TensorFlow")
    print("=" * 60)
    
    # Build A^l_p and Q^l
    result = build_A_lp_and_Q_l(
        poi_tree_file=poi_tree_file,
        poi_embeddings_file=poi_embeddings_file,
        metadata_file=metadata_file,
        output_file=output_file,
        hidden_dim=64,
        dropout_rate=0.1,
        n_epochs=100,
        lr=0.001
    )
    
    # Summary
    print("\n" + "=" * 60)
    print("FINAL SUMMARY")
    print("=" * 60)
    
    print("\nA^l_p (Inter-level POI Features):")
    for level in range(4):
        shape = result['A_lp'][f'level_{level}'].shape
        print(f"  Level {level}: {shape}")
    
    print("\nQ^l (Complete POI Representations):")
    for level in range(4):
        shape = result['Q_l'][f'level_{level}'].shape
        print(f"  Level {level}: {shape}")
    
    # Demo: Load and use
    print("\n" + "-" * 40)
    print("DEMO: Using AttentionAggregatorLoader")
    print("-" * 40)
    
    loader = AttentionAggregatorLoader(output_file)
    
    print("\nQ^l matrices loaded:")
    for level in range(4):
        Q = loader.get_Q_l(level)
        print(f"  Level {level}: {Q.shape}")
    
    # Example: Get top children for a parent (if attention weights available)
    if loader.attention_weights.get('level_1'):
        sample_parent = list(loader.attention_weights['level_1'].keys())[0]
        top_children = loader.get_top_k_children(level=1, parent_id=sample_parent, k=3)
        print(f"\nTop 3 children for parent '{sample_parent}' at level 1:")
        for child_id, weight in top_children:
            print(f"  - {child_id}: {weight:.4f}")
    
    print("\n" + "=" * 60)
    print("COMPLETE")
    print("=" * 60)

INTER-LEVEL POI FEATURE (A^l_p) GENERATION - TensorFlow
Initializing Inter-Level POI Aggregator (TensorFlow)

Loading POI tree from: ../../Sources/Files/poi_tree_with_uuids.json
Loading POI embeddings from: ../../Sources/Embeddings/poi_embeddings.pkl
Loading metadata from: ../../Sources/Embeddings/metadata.pkl

Building parent-children mappings...
  Level 1: 968 parents, avg_children=4.9, max=561
  Level 2: 44 parents, avg_children=30.8, max=125
  Level 3: 5 parents, avg_children=8.8, max=20

Initialization complete!

----------------------------------------
Data Summary:
  Level 0: 4696 POIs, embed_dim=221, total_children=0
  Level 1: 1355 POIs, embed_dim=171, total_children=4696
  Level 2: 44 POIs, embed_dim=125, total_children=1355
  Level 3: 5 POIs, embed_dim=105, total_children=44

Building A^l_p for All Levels

Building A^0_p (Level 0 - Building)
  A^0_p = Y^0 (base POI embeddings)
  Shape: (4696, 221)

Building A^1_p (Level 1)
  Child level 0: 4696 POIs, embed_dim=221
  Parent l

AttributeError: Exception encountered when calling layer "attention_aggregator_1" "                 f"(type AttentionAggregator).

'AttentionAggregator' object has no attribute 'projection'

Call arguments received by layer "attention_aggregator_1" "                 f"(type AttentionAggregator):
  • child_embeddings=tf.Tensor(shape=(1, 1, 221), dtype=float32)
  • mask=None
  • training=False