In [2]:
import pickle

In [1]:
import numpy as np
print(np.__version__)


1.26.0


## user_embeddings.pkl

In [3]:
# Load and inspect user_embeddings (X = XA + XT)
with open('./Embeddings/user_embeddings.pkl', 'rb') as f:
    user_data = pickle.load(f)

print("="*70)
print("USER EMBEDDINGS STRUCTURE INSPECTION")
print("="*70)

print(f"\nType: {type(user_data)}")
print(f"\nTop-level keys: {list(user_data.keys())}")

# Show structure of each key
for key, value in user_data.items():
    print(f"\n{key}:")
    print(f"  Type: {type(value)}")
    if isinstance(value, dict):
        print(f"  Keys: {list(value.keys())[:10]}")  # First 10 keys
        print(f"  Length: {len(value)}")
    elif isinstance(value, list):
        print(f"  Length: {len(value)}")
        print(f"  Sample: {value[:3]}")
    else:
        print(f"  Value: {value}")

USER EMBEDDINGS STRUCTURE INSPECTION

Type: <class 'dict'>

Top-level keys: ['user_embeddings', 'poi_embeddings']

user_embeddings:
  Type: <class 'dict'>
  Keys: ['966592ed-5bfd-4113-9c4d-d93cd3637b40', '6fea97cb-757c-47f2-9e34-3ccbb6714c80', '91d45a60-1f23-45f2-a343-a51017f818d9', '4fbee9f0-804a-4e5d-834e-553670746410', 'ed4d4ddf-8829-49c2-b540-b0c8aa36dfcf', '6b60d5cf-63cc-4dc4-9bbe-74da03df19db', '13e7ba11-c5a9-4f73-86d2-0f2c058891c7', 'af0e718d-4cca-4308-a72e-221b3f2fbe9e', '11069943-47a6-46e4-87ca-fa7d38142abc', '84273d4e-1e2c-4baf-8f0d-7b4f2ef833d0']
  Length: 21

poi_embeddings:
  Type: <class 'dict'>
  Keys: []
  Length: 0


## metadata.pkl

In [2]:
# Load and inspect metadata
with open('./Embeddings/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

print("="*70)
print("METADATA STRUCTURE INSPECTION")
print("="*70)

print(f"\nType: {type(metadata)}")
print(f"\nTop-level keys: {list(metadata.keys())}")

# Show structure of each key
for key, value in metadata.items():
    print(f"\n{key}:")
    print(f"  Type: {type(value)}")
    if isinstance(value, dict):
        print(f"  Keys: {list(value.keys())[:10]}")  # First 10 keys
        print(f"  Length: {len(value)}")
    elif isinstance(value, list):
        print(f"  Length: {len(value)}")
        print(f"  Sample: {value[:3]}")
    else:
        print(f"  Value: {value}")

METADATA STRUCTURE INSPECTION

Type: <class 'dict'>

Top-level keys: ['mappings', 'counts', 'user_ids', 'poi_ids', 'level_names', 'info']

mappings:
  Type: <class 'dict'>
  Keys: ['user', 'poi']
  Length: 2

counts:
  Type: <class 'dict'>
  Keys: ['users', 'pois_level_0', 'pois_level_1', 'pois_level_2', 'pois_level_3', 'pois_total']
  Length: 6

user_ids:
  Type: <class 'list'>
  Length: 21
  Sample: ['966592ed-5bfd-4113-9c4d-d93cd3637b40', '6fea97cb-757c-47f2-9e34-3ccbb6714c80', '91d45a60-1f23-45f2-a343-a51017f818d9']

poi_ids:
  Type: <class 'dict'>
  Keys: [0, 1, 2, 3]
  Length: 4

level_names:
  Type: <class 'dict'>
  Keys: [0, 1, 2, 3]
  Length: 4

info:
  Type: <class 'dict'>
  Keys: ['created_at', 'source_files', 'version']
  Length: 3


In [5]:
import pickle
import numpy as np
import os

def inspect_embedding_file(file_path):
    """Comprehensive inspection of any embedding file"""
    
    print("="*70)
    print(f"INSPECTING: {file_path}")
    print("="*70)
    
    if not os.path.exists(file_path):
        print(f"‚ùå File not found: {file_path}")
        return
    
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    
    print(f"\nüì¶ Top-level type: {type(data)}")
    
    # ========================================================================
    # CASE 1: Dictionary
    # ========================================================================
    if isinstance(data, dict):
        print(f"üìä Dictionary with {len(data)} keys")
        print(f"\nüîë Keys: {list(data.keys())}")
        
        # Inspect each key
        for key, value in data.items():
            print(f"\n{'‚îÄ'*70}")
            print(f"Key: '{key}'")
            print(f"  Type: {type(value)}")
            
            if isinstance(value, dict):
                print(f"  Sub-dictionary with {len(value)} entries")
                sample_keys = list(value.keys())[:5]
                print(f"  Sample keys: {sample_keys}")
                
                # Check what the values look like
                if sample_keys:
                    sample_value = value[sample_keys[0]]
                    print(f"  Sample value type: {type(sample_value)}")
                    if isinstance(sample_value, np.ndarray):
                        print(f"  Sample value shape: {sample_value.shape}")
                        print(f"  Sample value dtype: {sample_value.dtype}")
                        print(f"  Sample value preview: {sample_value[:5] if len(sample_value) > 5 else sample_value}")
                    elif isinstance(sample_value, (list, tuple)):
                        print(f"  Sample value length: {len(sample_value)}")
                        print(f"  Sample value preview: {sample_value[:5] if len(sample_value) > 5 else sample_value}")
                    else:
                        print(f"  Sample value: {sample_value}")
            
            elif isinstance(value, np.ndarray):
                print(f"  NumPy array")
                print(f"  Shape: {value.shape}")
                print(f"  Dtype: {value.dtype}")
                print(f"  Size: {value.size} elements ({value.nbytes / 1024 / 1024:.2f} MB)")
                print(f"  Min: {value.min():.4f}, Max: {value.max():.4f}, Mean: {value.mean():.4f}")
                print(f"  Sample: {value.flatten()[:10]}")
            
            elif isinstance(value, list):
                print(f"  List with {len(value)} elements")
                if value:
                    print(f"  First element type: {type(value[0])}")
                    if isinstance(value[0], np.ndarray):
                        print(f"  First element shape: {value[0].shape}")
                    print(f"  Sample: {value[:3]}")
            
            elif isinstance(value, (int, float, str, bool)):
                print(f"  Value: {value}")
            
            else:
                print(f"  Complex type, cannot display directly")
    
    # ========================================================================
    # CASE 2: NumPy Array
    # ========================================================================
    elif isinstance(data, np.ndarray):
        print(f"üìä NumPy Array")
        print(f"  Shape: {data.shape}")
        print(f"  Dtype: {data.dtype}")
        print(f"  Size: {data.size} elements ({data.nbytes / 1024 / 1024:.2f} MB)")
        print(f"  Min: {data.min():.4f}, Max: {data.max():.4f}, Mean: {data.mean():.4f}")
        print(f"\n  Sample (first row):\n{data[0] if len(data.shape) > 1 else data[:10]}")
    
    # ========================================================================
    # CASE 3: List
    # ========================================================================
    elif isinstance(data, list):
        print(f"üìä List with {len(data)} elements")
        if data:
            print(f"  First element type: {type(data[0])}")
            if isinstance(data[0], np.ndarray):
                print(f"  First element shape: {data[0].shape}")
                print(f"  All elements same shape: {all(isinstance(x, np.ndarray) and x.shape == data[0].shape for x in data)}")
            print(f"  Sample (first 3): {data[:3]}")
    
    # ========================================================================
    # CASE 4: Other
    # ========================================================================
    else:
        print(f"  Unhandled type: {type(data)}")
        print(f"  Data: {data}")
    
    print("\n" + "="*70 + "\n")

# ============================================================================
# INSPECT ALL EMBEDDING FILES
# ============================================================================

embedding_files = [
    './Embeddings/user_embeddings.pkl',
    './Embeddings/poi_embeddings.pkl',
]

print("\n" + "üîç " * 35)
print("EMBEDDING FILE INSPECTOR")
print("üîç " * 35 + "\n")

for file_path in embedding_files:
    inspect_embedding_file(file_path)


üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç 
EMBEDDING FILE INSPECTOR
üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç üîç 

INSPECTING: ./Embeddings/user_embeddings.pkl

üì¶ Top-level type: <class 'dict'>
üìä Dictionary with 2 keys

üîë Keys: ['user_embeddings', 'poi_embeddings']

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Key: 'user_embeddings'
  Type: <class 'dict'>
  Sub-dictionary with 21 entries
  Sample keys: ['966592ed-5bfd-4113-9c4d-d93cd3637b40', '6fea97cb-757c-47f2-9e34-3ccbb6714c80', '91d45a60-1f23-45f2-a343-a51017f818d9', '4fbee9f0-804a-4e5d-834e-553670746

  data = pickle.load(f)


AttributeError: 'TfidfVectorizer' object has no attribute 'norm'