# Visualización de Entidades Enriquecidas
Este notebook carga el archivo `enriched_data.jsonl` y muestra las entidades extraídas agrupadas por tipo.

In [39]:
import json
from collections import defaultdict
from pathlib import Path

# Ruta al archivo de salida (JSONL: una línea por objeto)
output_path = Path(r'D:\TravelApp\Project\scripts\data\scraper_enrichment\enriched_data.jsonl')

# Verificar que el archivo existe
if not output_path.exists():
    print(f"El archivo {output_path} no existe. Ejecuta primero el pipeline de scraping.")
    all_entities = []
else:
    # Cargar todas las entidades desde JSONL
    all_entities = []
    with open(output_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                try:
                    data = json.loads(line)
                    all_entities.extend(data.get('entities', []))
                except json.JSONDecodeError as e:
                    print(f"Error al parsear línea JSON: {e}")
                    continue

print(f'Total de entidades extraídas: {len(all_entities)}')

Total de entidades extraídas: 1516


In [40]:
entidades_por_tipo = defaultdict(list)
for entidad in all_entities:
    tipos = entidad.get('entity_type', 'desconocido')
    # Si es una lista, añade la entidad a cada tipo; si no, a uno solo
    if isinstance(tipos, list):
        for tipo in tipos:
            entidades_por_tipo[str(tipo)].append(entidad)
    else:
        entidades_por_tipo[str(tipos)].append(entidad)

for tipo, entidades in entidades_por_tipo.items():
    print(f"\n=== {tipo.upper()} ({len(entidades)}) ===")
    for e in entidades:
        nombre = e.get('name') or e.get('title') or e.get('advice_text') or '[Sin nombre]'
        desc = e.get('description') or e.get('context') or ''
        print(f'- {nombre}: {desc[:120]}')


=== SITE (805) ===
- Tailandia: Diverse country with attractions for all tastes, known for its hospitality, delicious cuisine, and beautiful landscapes 
- Bangkok: Capital city of Thailand with a mix of modernity and tradition, known for its bustling streets, vibrant markets, and cul
- Bangkok: One of the must-see places in Thailand, offering a mix of Western infrastructure and Eastern traditions. Explore the Gra
- Chiang Mai: Known as the 'Northern Capital' of Thailand, offering a magical environment with nature, Buddhist temples, palaces, and 
- Chiang Mai: Explore the city for at least 2 days, with an option for additional excursions. Consider a tour to Chiang Rai from Chian
- Doi Suthep: One of the most beautiful temples to visit in Thailand.
- Phi Phi Islands: Famous island group in Thailand known for its beaches and clear turquoise waters. Includes Koh Phi Phi Don, Koh Phi Phi 
- Sukhothai Historical Park: Historical park located between Bangkok and Chiang Mai, showcasing ruins 

In [41]:
# Mostrar toda la información de cada entidad de forma estructurada
for tipo, entidades in entidades_por_tipo.items():
    print(f"\n=== {tipo.upper()} ({len(entidades)}) ===")
    for idx, e in enumerate(entidades, 1):
        print(f"\nEntidad #{idx}")
        for k, v in e.items():
            if isinstance(v, list):
                print(f"  {k}:")
                for item in v:
                    print(f"    - {item}")
            elif isinstance(v, dict):
                print(f"  {k}:")
                for subk, subv in v.items():
                    print(f"    {subk}: {subv}")
            else:
                print(f"  {k}: {v}")
        print('-' * 40)


=== SITE (805) ===

Entidad #1
  entity_type: site
  name: Tailandia
  subtype: country
  description: Diverse country with attractions for all tastes, known for its hospitality, delicious cuisine, and beautiful landscapes including rivers, mountains, waterfalls, and valleys.
  hierarchy:
    - {'type': 'country', 'name': 'Thailand', 'code': 'TH'}
  location_text: Southeast Asia
  user_impressions:
    liked: True
  official_website: https://www.tourismthailand.org/
  images:
    - https://example.com/thailand.jpg
----------------------------------------

Entidad #2
  entity_type: site
  name: Bangkok
  subtype: city
  description: Capital city of Thailand with a mix of modernity and tradition, known for its bustling streets, vibrant markets, and cultural landmarks.
  hierarchy:
    - {'type': 'country', 'name': 'Thailand', 'code': 'TH'}
  location_text: Central Thailand
  avg_visit_duration: 3-4 days
  security: Exercise caution in crowded areas and beware of scams.
  restrictions:
 