In [1]:
import pickle
import numpy as np
from sklearn.preprocessing import normalize
from typing import Dict
from datetime import datetime

In [None]:
class PersonalisedPreferenceBuilder:
	def __init__(self,
				interactions_file: str = 'interactions.pkl',
				poi_embeddings_file: str = 'poi_embeddings.pkl',
				user_embeddings_file: str = 'user_embeddings.pkl',
				metadata_file: str = 'metadata.pkl'):
		
		print("=" * 60)
		print("Initializing Personalized Preference Builder")
		print("=" * 60)
	
		with open(interactions_file, 'rb') as f:
			self.interactions_data = pickle.load(f)
	
		with open(poi_embeddings_file, 'rb') as f:
			self.poi_embeddings_data = pickle.load(f)
		
		with open(user_embeddings_file, 'rb') as f:
			self.user_embeddings_data = pickle.load(f)
		
		with open(metadata_file, 'rb') as f:
			self.metadata = pickle.load(f)

	def build_A_lu(self, 
				level: int,
				aggregation: str = 'weighted_mean',
				normalize_output: bool = True,
		) -> np.ndarray:

		level_key = f'level_{level}'

		R_l = self.interactions_data['interactions'][level_key]['matrices']['interaction']
		n_users, n_pois_interaction = R_l.shape

		Y_l = self.poi_embeddings_data['poi_embeddings'][level_key]['embeddings']
		n_pois_embed, embed_dim = Y_l.shape
	
		if n_pois_interaction != n_pois_embed:
			n_pois = min(n_pois_interaction, n_pois_embed)
			R_l = R_l[:, :n_pois]
			Y_l = Y_l[:n_pois, :]

		if aggregation == 'weighted_mean':
			weighted_sum = R_l.dot(Y_l)  # (n_users, embed_dim)
		
			# Get sum of weights per user for normalization
			weight_sums = np.array(R_l.sum(axis=1)).flatten()  # (n_users,)
			weight_sums[weight_sums == 0] = 1.0  # Avoid division by zero
			
			A_lu = weighted_sum / weight_sums.reshape(-1, 1)

		elif aggregation == 'mean':
			# Simple average (treat all interactions equally)
			binary_R = (R_l > 0).astype(np.float32)
			
			embedding_sum = binary_R.dot(Y_l)
			interaction_counts = np.array(binary_R.sum(axis=1)).flatten()
			interaction_counts[interaction_counts == 0] = 1.0
			
			A_lu = embedding_sum / interaction_counts.reshape(-1, 1)
			
		elif aggregation == 'sum':
			# Sum of interacted POI embeddings
			A_lu = R_l.dot(Y_l)
			
		elif aggregation == 'max':
			# Max pooling - more expensive, iterate per user
			A_lu = np.zeros((n_users, embed_dim), dtype=np.float32)
			
			for user_idx in range(n_users):
				user_interactions = R_l.getrow(user_idx)
				poi_indices = user_interactions.indices
				
				if len(poi_indices) > 0:
					user_poi_embeddings = Y_l[poi_indices]
					A_lu[user_idx] = np.max(user_poi_embeddings, axis=0)
		
		else:
			raise ValueError(f"Unknown aggregation method: {aggregation}")
		
		# Handle users with no interactions (cold start)
		no_interaction_mask = np.array(R_l.sum(axis=1)).flatten() == 0
		n_cold_users = no_interaction_mask.sum()
		
		if n_cold_users > 0:
			print(f"\n  Cold-start users (no interactions): {n_cold_users}")
			# Fill with global average POI embedding
			global_avg = Y_l.mean(axis=0)
			A_lu[no_interaction_mask] = global_avg
		
		# Normalize if requested
		if normalize_output:
			A_lu = normalize(A_lu, norm='l2', axis=1)
			print(f"\n  Applied L2 normalization")
		
		print(f"\nA^{level}_u shape: {A_lu.shape}")
		print(f"  Value range: [{A_lu.min():.4f}, {A_lu.max():.4f}]")
		print(f"  Mean: {A_lu.mean():.4f}, Std: {A_lu.std():.4f}")
		
		return A_lu.astype(np.float32)
		
	def build_all_A_lu(self,
					aggregation: str = 'weighted_mean',
					normalize_output: bool = True) -> Dict[str, np.ndarray]:
		A_lu_all = {}
		
		for level in range(4):
			level_key = f'level_{level}'
			A_lu_all[level_key] = self.build_A_lu(
				level=level,
				aggregation=aggregation,
				normalize_output=normalize_output
			)
		
		return A_lu_all

	def build_complete_user_representation(self,
										level: int,
										aggregation: str = 'weighted_mean') -> np.ndarray:
		"""
		Build complete user representation P^l = [X_A || X_T || A^l_u]
		
		Args:
			level: Hierarchy level
			aggregation: Aggregation method for A^l_u
		
		Returns:
			P^l: (n_users, X_A_dim + X_T_dim + A_lu_dim) matrix
		"""
		print(f"\n{'=' * 60}")
		print(f"Building Complete User Representation P^{level}")
		print(f"{'=' * 60}")
		
		# Get X_A (explicit user attributes)
		X_A = self.user_embeddings_data.get('X_A', None)
		if X_A is None:
			# Try alternative key
			X_A = self.user_embeddings_data.get('user_embeddings', {}).get('X_A', None)
		
		# Get X_T (derived user attributes)
		X_T = self.user_embeddings_data.get('X_T', None)
		if X_T is None:
			X_T = self.user_embeddings_data.get('user_embeddings', {}).get('X_T', None)
		
		# Build A^l_u
		A_lu = self.build_A_lu(level=level, aggregation=aggregation)
		
		# Concatenate
		components = []
		component_names = []
		
		if X_A is not None:
			components.append(X_A)
			component_names.append(f'X_A ({X_A.shape[1]} dims)')
			print(f"\n  X_A shape: {X_A.shape}")
		
		if X_T is not None:
			components.append(X_T)
			component_names.append(f'X_T ({X_T.shape[1]} dims)')
			print(f"  X_T shape: {X_T.shape}")
		
		components.append(A_lu)
		component_names.append(f'A^{level}_u ({A_lu.shape[1]} dims)')
		print(f"  A^{level}_u shape: {A_lu.shape}")
		
		# Verify all have same number of users
		n_users_list = [c.shape[0] for c in components]
		if len(set(n_users_list)) > 1:
			print(f"\n  Warning: User count mismatch: {n_users_list}")
			min_users = min(n_users_list)
			components = [c[:min_users] for c in components]
		
		P_l = np.hstack(components)
		
		print(f"\n  P^{level} = [{' || '.join(component_names)}]")
		print(f"  Final P^{level} shape: {P_l.shape}")
		
		return P_l.astype(np.float32)
	
	def save_personalized_preferences(self,
									output_file: str = 'user_personalized_preferences.pkl',
									aggregation: str = 'weighted_mean'):
		"""
		Build and save A^l_u for all levels, plus complete P^l representations
		
		Args:
			output_file: Output file path
			aggregation: Aggregation method
		"""
		print("\n" + "=" * 60)
		print("Building and Saving Personalized Preferences")
		print("=" * 60)
		
		# Build A^l_u for all levels
		A_lu_all = self.build_all_A_lu(aggregation=aggregation)
		
		# Build complete P^l for all levels
		P_l_all = {}
		for level in range(4):
			level_key = f'level_{level}'
			P_l_all[level_key] = self.build_complete_user_representation(
				level=level, 
				aggregation=aggregation
			)
		
		# Prepare save data
		save_data = {
			# Personalized preferences A^l_u
			'A_lu': A_lu_all,
			
			# Complete user representations P^l
			'P_l': P_l_all,
			
			# Dimensions
			'dimensions': {
				level_key: {
					'A_lu_shape': A_lu_all[level_key].shape,
					'P_l_shape': P_l_all[level_key].shape
				}
				for level_key in A_lu_all.keys()
			},
			
			# Metadata
			'info': {
				'created_at': datetime.now().isoformat(),
				'aggregation_method': aggregation,
				'components': ['X_A', 'X_T', 'A_lu']
			}
		}
		
		# Save
		print(f"\nSaving to: {output_file}")
		with open(output_file, 'wb') as f:
			pickle.dump(save_data, f)
		
		# Summary
		import os
		file_size = os.path.getsize(output_file) / (1024 * 1024)
		print(f"  File size: {file_size:.2f} MB")
		
		print("\n  Saved matrices:")
		for level_key, dims in save_data['dimensions'].items():
			print(f"    {level_key}:")
			print(f"      A_lu: {dims['A_lu_shape']}")
			print(f"      P_l: {dims['P_l_shape']}")
		
		print("\n  Save complete!")
		
		return save_data

	def build_A_lu_standalone(interactions_file: str = 'interactions.pkl',
							poi_embeddings_file: str = 'poi_embeddings.pkl',
							level: int = 0,
							aggregation: str = 'weighted_mean') -> np.ndarray:
		"""
		Standalone function to build A^l_u without the full class
		
		Quick usage:
			A_lu = build_A_lu_standalone(level=0)
		"""
		# Load interaction matrix
		with open(interactions_file, 'rb') as f:
			interactions = pickle.load(f)
		
		R_l = interactions['interactions'][f'level_{level}']['matrices']['interaction']
		
		# Load POI embeddings
		with open(poi_embeddings_file, 'rb') as f:
			poi_data = pickle.load(f)
		
		Y_l = poi_data['poi_embeddings'][f'level_{level}']['embeddings']
		
		# Ensure dimensions match
		n_pois = min(R_l.shape[1], Y_l.shape[0])
		R_l = R_l[:, :n_pois]
		Y_l = Y_l[:n_pois, :]
		
		# Compute weighted mean
		if aggregation == 'weighted_mean':
			weighted_sum = R_l.dot(Y_l)
			weight_sums = np.array(R_l.sum(axis=1)).flatten()
			weight_sums[weight_sums == 0] = 1.0
			A_lu = weighted_sum / weight_sums.reshape(-1, 1)
		else:
			A_lu = R_l.dot(Y_l)
		
		# Handle cold-start users
		no_interaction = np.array(R_l.sum(axis=1)).flatten() == 0
		if no_interaction.any():
			A_lu[no_interaction] = Y_l.mean(axis=0)
		
		# Normalize
		A_lu = normalize(A_lu, norm='l2', axis=1)
		
		return A_lu.astype(np.float32)

In [None]:
if __name__ == "__main__":
    interactions_file = "../../Sources/Embeddings/interactions.pkl"
    user_embeddings_file = "../../Sources/Embeddings/user_embeddings.pkl"
    poi_embeddings_file = "../../Sources/Embeddings/poi_embeddings.pkl"
    meta_data_file = "../../Sources/Embeddings/metadata.pkl"
    output_file = "user_personalized_preferences.pkl"

    builder = PersonalisedPreferenceBuilder(
        interactions_file=interactions_file,
        poi_embeddings_file=poi_embeddings_file,
        user_embeddings_file=user_embeddings_file,
        metadata_file=meta_data_file
    )

    save_data = builder.save_personalized_preferences(
        output_file=output_file,
        aggregation='weighted_mean'
    )

    # Demo
    print("\n" + "=" * 60)
    print("DEMO: Accessing Personalized Preferences")
    print("=" * 60)
    
    # Load and inspect
    with open(output_file, 'rb') as f:
        loaded = pickle.load(f)
    
    print("\nA^l_u matrices (personalized POI preferences):")
    for level in range(4):
        level_key = f'level_{level}'
        A_lu = loaded['A_lu'][level_key]
        print(f"  Level {level}: {A_lu.shape}")
    
    print("\nP^l matrices (complete user representations):")
    for level in range(4):
        level_key = f'level_{level}'
        P_l = loaded['P_l'][level_key]
        print(f"  Level {level}: {P_l.shape}")

Initializing Personalized Preference Builder

Building and Saving Personalized Preferences

  Applied L2 normalization

A^0_u shape: (21, 221)
  Value range: [-0.6528, 0.5586]
  Mean: 0.0120, Std: 0.0662

  Applied L2 normalization

A^1_u shape: (21, 171)
  Value range: [-0.5511, 0.5648]
  Mean: 0.0120, Std: 0.0755

  Applied L2 normalization

A^2_u shape: (21, 125)
  Value range: [-0.3158, 0.4136]
  Mean: 0.0154, Std: 0.0881

  Applied L2 normalization

A^3_u shape: (21, 105)
  Value range: [-0.3087, 0.3364]
  Mean: 0.0208, Std: 0.0953

Building Complete User Representation P^0

  Applied L2 normalization

A^0_u shape: (21, 221)
  Value range: [-0.6528, 0.5586]
  Mean: 0.0120, Std: 0.0662
  A^0_u shape: (21, 221)

  P^0 = [A^0_u (221 dims)]
  Final P^0 shape: (21, 221)

Building Complete User Representation P^1

  Applied L2 normalization

A^1_u shape: (21, 171)
  Value range: [-0.5511, 0.5648]
  Mean: 0.0120, Std: 0.0755
  A^1_u shape: (21, 171)

  P^1 = [A^1_u (171 dims)]
  Final P^