## Arquitectura General
Descripción General
Sistema completo de recomendación turística que combina Deep Learning con factores dinámicos en tiempo real para generar recomendaciones personalizadas.
Componentes Principales

Procesamiento de Datos: Limpieza y preparación de múltiples fuentes
Modelo de Deep Learning: Red neuronal híbrida optimizada para GPU
Re-ranking Dinámico: Ajuste en tiempo real basado en clima y tendencias
Pipeline Automatizado: Orquestación y actualización diaria

Flujo de Datos

Datos Crudos → Preprocesamiento → Entrenamiento → Modelo Base → Re-ranking → Recomendaciones Finales

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pickle
import requests
import json
import sqlite3
import logging
import time
import os
import warnings
import re
from pathlib import Path
from typing import Dict, List, Optional, Tuple

# Machine Learning
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model

# Web scraping (opcional)
try:
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    SELENIUM_AVAILABLE = True
except ImportError:
    SELENIUM_AVAILABLE = False

# Visualización
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')

In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    tf.config.experimental.set_virtual_device_configuration(
        physical_devices[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3584)]
    )

# Configuración de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('tourism_recommender_system.log'),
        logging.StreamHandler()
    ]
)

# OpenWeatherMap API Configuration
WEATHER_API_KEY = "1e74b5d9bcc3b9b36252ad21109989ec"

### Limpieza y preparación de datos

In [5]:
class TourismDataPreprocessor:
    """Clase completa para preprocesar datos de turismo con todas las funcionalidades"""
    
    def __init__(self):
        self.scalers = {}
        self.encoders = {}
        
    def load_and_clean_datasets(self, file_paths):
        """Carga y limpia todos los datasets"""
        datasets = {}
        
        logging.info("Cargando datasets principales...")
        
        # Cargar datasets principales
        # Actividades (5 archivos)
        if 'activities' in file_paths:
            activities_list = []
            for file in file_paths['activities']:
                if os.path.exists(file):
                    df = pd.read_csv(file)
                    activities_list.append(df)
                    logging.info(f"  Cargado: {file} ({len(df)} registros)")
            if activities_list:
                datasets['activities'] = pd.concat(activities_list, ignore_index=True)
                logging.info(f"Total actividades: {len(datasets['activities'])}")
        
        # Comentarios con análisis de sentimiento y clima
        if 'reviews' in file_paths and os.path.exists(file_paths['reviews']):
            datasets['reviews'] = pd.read_csv(file_paths['reviews'])
            logging.info(f"Reviews cargadas: {len(datasets['reviews'])}")
        
        # Datos de turismo ONU (7 archivos) - opcional
        if 'un_tourism' in file_paths:
            un_data_list = []
            for file in file_paths['un_tourism']:
                if os.path.exists(file):
                    df = pd.read_csv(file)
                    un_data_list.append(df)
            if un_data_list:
                datasets['un_tourism'] = pd.concat(un_data_list, ignore_index=True)
        
        # Meta Data for Good - opcional
        if 'commuting_zones' in file_paths and os.path.exists(file_paths['commuting_zones']):
            datasets['commuting_zones'] = pd.read_csv(file_paths['commuting_zones'])
        
        if 'movement_data' in file_paths and os.path.exists(file_paths['movement_data']):
            datasets['movement_data'] = pd.read_csv(file_paths['movement_data'])
        
        # Google Trends - opcional
        if 'search_trends' in file_paths and os.path.exists(file_paths['search_trends']):
            datasets['search_trends'] = pd.read_csv(file_paths['search_trends'])
        
        if 'monthly_interest' in file_paths and os.path.exists(file_paths['monthly_interest']):
            datasets['monthly_interest'] = pd.read_csv(file_paths['monthly_interest'])
        
        return datasets
    
    def match_reviews_to_activities(self, activities_df, sentiment_df):
        """Empareja reviews con actividades basándose en títulos - versión completa"""
        logging.info("Emparejando reviews con actividades...")
        
        # Verificar columnas necesarias
        required_activity_cols = ['titulo', 'ciudad', 'id', 'precio', 'rating']
        required_sentiment_cols = ['texto', 'ciudad', 'sentimiento', 'confianza']
        
        missing_activity_cols = set(required_activity_cols) - set(activities_df.columns)
        missing_sentiment_cols = set(required_sentiment_cols) - set(sentiment_df.columns)
        
        if missing_activity_cols:
            logging.warning(f"Columnas faltantes en activities: {missing_activity_cols}")
        if missing_sentiment_cols:
            logging.warning(f"Columnas faltantes en sentiment: {missing_sentiment_cols}")
        
        # NORMALIZAR NOMBRES DE CIUDADES
        logging.info("Normalizando nombres de ciudades...")
        activities_df = activities_df.copy()
        sentiment_df = sentiment_df.copy()
        
        # Función para normalizar nombres de ciudades
        def normalize_city_name(city_name):
            if pd.isna(city_name):
                return 'Unknown'
            
            city_str = str(city_name).lower().strip()
            
            # Mapeo completo de normalizaciones
            city_mappings = {
                'barcelona': 'Barcelona',
                'barcelona1': 'Barcelona',
                'madrid': 'Madrid',
                'malaga': 'Malaga',
                'málaga': 'Malaga',
                'sevilla': 'Sevilla',
                'valencia': 'Valencia',
                'tenerife': 'Tenerife',
                'gran canaria': 'Gran Canaria',
                'canaria': 'Gran Canaria',
                'canarias': 'Gran Canaria',
                'mallorca': 'Mallorca',
                'palma de mallorca': 'Mallorca',
                'palma': 'Mallorca'
            }
            
            return city_mappings.get(city_str, city_name)
        
        # Aplicar normalización
        activities_df['ciudad_original'] = activities_df['ciudad']
        sentiment_df['ciudad_original'] = sentiment_df['ciudad']
        
        activities_df['ciudad'] = activities_df['ciudad'].apply(normalize_city_name)
        sentiment_df['ciudad'] = sentiment_df['ciudad'].apply(normalize_city_name)
        
        logging.info("Ciudades después de normalización:")
        logging.info(f"  Activities: {sorted(activities_df['ciudad'].unique())}")
        logging.info(f"  Sentiment: {sorted(sentiment_df['ciudad'].unique())}")
        
        # Crear dataset expandido donde cada review se asocia con su actividad
        matched_reviews = []
        total_matches = 0
        successful_matches = 0
        errors_count = 0
        
        logging.info(f"Procesando {len(activities_df)} actividades...")
        
        for idx, activity in activities_df.iterrows():
            if idx % 1000 == 0:
                logging.info(f"Progreso: {idx}/{len(activities_df)} ({idx/len(activities_df)*100:.1f}%) - Matches: {total_matches}")
            
            try:
                activity_title = str(activity['titulo']).lower().strip()
                activity_city = activity['ciudad']
                item_id = activity.get('id', f'item_{idx}')
                
                # Saltear si el título está vacío o es muy corto
                if len(activity_title) < 3:
                    continue
                
                # NORMALIZAR TÍTULO - Remover caracteres especiales problemáticos
                import string
                # Crear tabla de traducción para remover puntuación problemática
                translator = str.maketrans('', '', '!¡¿?()[]{}*+^$|\\')
                activity_title_clean = activity_title.translate(translator)
                
                # Método 1: Búsqueda exacta sin regex
                try:
                    # Buscar reviews que contengan el título (búsqueda simple)
                    matching_reviews_mask = (
                        sentiment_df['texto'].str.lower().str.contains(activity_title_clean, na=False, regex=False) &
                        (sentiment_df['ciudad'] == activity_city)
                    )
                    
                    matching_reviews_subset = sentiment_df[matching_reviews_mask].copy()
                    
                except Exception as simple_error:
                    # Método 2: Búsqueda palabra por palabra
                    try:
                        words = activity_title_clean.split()
                        if len(words) >= 2:  # Solo si tiene al menos 2 palabras
                            # Buscar reviews que contengan al menos 70% de las palabras del título
                            mask = sentiment_df['ciudad'] == activity_city
                            word_matches = 0
                            for word in words:
                                if len(word) > 3:  # Solo palabras significativas
                                    try:
                                        word_mask = sentiment_df['texto'].str.lower().str.contains(word, na=False, regex=False)
                                        if word_mask.any():
                                            mask = mask & word_mask
                                            word_matches += 1
                                    except:
                                        continue
                            
                            # Solo considerar match si encontramos suficientes palabras
                            if word_matches >= max(1, len([w for w in words if len(w) > 3]) * 0.7):
                                matching_reviews_subset = sentiment_df[mask].copy()
                            else:
                                matching_reviews_subset = pd.DataFrame()
                        else:
                            matching_reviews_subset = pd.DataFrame()
                            
                    except Exception as word_error:
                        errors_count += 1
                        if errors_count < 10:
                            logging.warning(f"Error procesando '{activity_title[:30]}...': {word_error}")
                        continue
                
                # Agregar información de la actividad a cada review matching
                if not matching_reviews_subset.empty:
                    matching_reviews_subset['item_id'] = item_id
                    matching_reviews_subset['precio'] = activity.get('precio', 0)
                    matching_reviews_subset['rating_actividad'] = activity.get('rating', 3.0)
                    matching_reviews_subset['titulo_actividad'] = activity['titulo']
                    
                    matched_reviews.append(matching_reviews_subset)
                    total_matches += len(matching_reviews_subset)
                    successful_matches += 1
                    
            except Exception as e:
                errors_count += 1
                if errors_count < 10:
                    logging.error(f"Error general procesando actividad {idx}: {e}")
                continue
        
        # Consolidar resultados
        if matched_reviews:
            all_matched_reviews = pd.concat(matched_reviews, ignore_index=True)
            
            logging.info(f"\nRESULTADOS DEL EMPAREJAMIENTO:")
            logging.info(f"   Reviews emparejadas: {len(all_matched_reviews):,}")
            logging.info(f"   Actividades con reviews: {all_matched_reviews['item_id'].nunique()}")
            logging.info(f"   Tasa de cobertura: {successful_matches/len(activities_df)*100:.1f}%")
            logging.info(f"   Actividades procesadas exitosamente: {successful_matches}")
            logging.info(f"   Errores encontrados: {errors_count}")
            
            # Estadísticas adicionales
            reviews_per_activity = all_matched_reviews.groupby('item_id').size()
            logging.info(f"   Reviews promedio por actividad: {reviews_per_activity.mean():.1f}")
            logging.info(f"   Actividad con más reviews: {reviews_per_activity.max()}")
            
            # Estadísticas por ciudad
            city_stats = all_matched_reviews.groupby('ciudad').agg({
                'item_id': 'nunique',
                'texto': 'count'
            }).rename(columns={'item_id': 'actividades', 'texto': 'reviews'})
            
            logging.info(f"\n   Distribución por ciudad:")
            for city, stats in city_stats.iterrows():
                logging.info(f"     {city}: {stats['actividades']} actividades, {stats['reviews']} reviews")
            
        else:
            logging.warning("NO SE ENCONTRARON EMPAREJAMIENTOS")
            # Crear DataFrame vacío con columnas esperadas
            all_matched_reviews = pd.DataFrame(columns=[
                'texto', 'ciudad', 'categoria', 'fecha', 'fuente', 
                'sentimiento', 'confianza', 'descripcion_sencilla',
                'item_id', 'precio', 'rating_actividad', 'titulo_actividad'
            ])
        
        return all_matched_reviews
    
    def create_synthetic_user_ids(self, reviews_df):
        """Crea user_ids sintéticos basándose en patrones"""
        logging.info("Creando user_ids sintéticos...")
        
        if reviews_df.empty:
            return reviews_df
            
        reviews_df = reviews_df.copy()
        reviews_df['fecha_comentario'] = pd.to_datetime(reviews_df.get('fecha', datetime.now()), errors='coerce')
        
        # Crear grupos base
        reviews_df['grupo_base'] = (
            reviews_df['ciudad'].astype(str) + '_' + 
            reviews_df.get('fuente', 'unknown').astype(str)
        )
        
        # Asignar user_ids basados en patrones
        user_id_counter = 0
        reviews_df['user_id'] = None
        
        for grupo in reviews_df['grupo_base'].unique():
            grupo_reviews = reviews_df[reviews_df['grupo_base'] == grupo]
            
            # Asignar IDs secuenciales por grupo
            for i in range(len(grupo_reviews)):
                if i % 5 == 0:  # Nuevo usuario cada 5 reviews
                    user_id_counter += 1
                reviews_df.loc[grupo_reviews.index[i], 'user_id'] = f'user_{user_id_counter}'
        
        logging.info(f"Creados {user_id_counter} user_ids sintéticos")
        return reviews_df

### Modelo DeepLearning

In [7]:
class TourismRecommenderModel:
    """Modelo de recomendación con Deep Learning optimizado para RTX 3050 Ti"""
    
    def __init__(self, embedding_dim=32, dense_units=64):
        self.embedding_dim = embedding_dim
        self.dense_units = dense_units
        self.model = None
        self.history = None
        self.user_to_idx = {}
        self.item_to_idx = {}
        self.city_to_idx = {}
        
    def build_hybrid_model(self, num_users, num_items, num_cities, 
                          contextual_features_dim=10, weather_features_dim=15):
        """Construye modelo híbrido compacto optimizado para 4GB VRAM"""
        logging.info("Construyendo modelo híbrido compacto para RTX 3050 Ti...")
        
        # Entradas
        user_input = layers.Input(shape=(), name='user_id')
        item_input = layers.Input(shape=(), name='item_id')
        city_input = layers.Input(shape=(), name='city_id')
        contextual_input = layers.Input(shape=(contextual_features_dim,), name='contextual_features')
        temporal_input = layers.Input(shape=(4,), name='temporal_features')
        weather_sentiment_input = layers.Input(shape=(weather_features_dim,), name='weather_sentiment')
        
        # Embeddings compactos
        user_embedding = layers.Embedding(
            num_users + 1, self.embedding_dim, 
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )(user_input)
        user_vec = layers.Flatten()(user_embedding)
        
        item_embedding = layers.Embedding(
            num_items + 1, self.embedding_dim,
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )(item_input)
        item_vec = layers.Flatten()(item_embedding)
        
        city_embedding = layers.Embedding(
            num_cities + 1, self.embedding_dim // 2,
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )(city_input)
        city_vec = layers.Flatten()(city_embedding)
        
        # Procesamiento de features
        contextual_dense = layers.Dense(self.dense_units//2, activation='relu')(contextual_input)
        contextual_dense = layers.Dropout(0.2)(contextual_dense)
        
        temporal_dense = layers.Dense(16, activation='relu')(temporal_input)
        
        weather_dense = layers.Dense(32, activation='relu', name='climate_layer')(weather_sentiment_input)
        weather_dense = layers.Dropout(0.2)(weather_dense)
        
        # Combinación
        combined = layers.Concatenate(name='feature_fusion')([
            user_vec, item_vec, city_vec,
            contextual_dense, temporal_dense, weather_dense
        ])
        
        # Red principal
        x = layers.Dense(self.dense_units, activation='relu', name='main_layer')(combined)
        x = layers.Dropout(0.3)(x)
        x = layers.Dense(self.dense_units//2, activation='relu', name='output_layer')(x)
        x = layers.Dropout(0.2)(x)
        
        # Salidas
        rating_output = layers.Dense(1, activation='sigmoid', name='rating')(x)
        sentiment_output = layers.Dense(1, activation='sigmoid', name='sentiment')(x)
        interaction_output = layers.Dense(1, activation='sigmoid', name='interaction')(x)
        
        # Modelo final
        self.model = Model(
            inputs=[user_input, item_input, city_input, 
                   contextual_input, temporal_input, weather_sentiment_input],
            outputs=[rating_output, sentiment_output, interaction_output],
            name='TourismRecommenderCompact'
        )
        
        # Compilar
        optimizer = keras.optimizers.Adam(learning_rate=0.002, epsilon=1e-7)
        
        self.model.compile(
            optimizer=optimizer,
            loss={
                'rating': 'mse',
                'sentiment': 'mse', 
                'interaction': 'binary_crossentropy'
            },
            loss_weights={
                'rating': 1.0,
                'sentiment': 0.7,
                'interaction': 0.3
            },
            metrics={
                'rating': ['mae'],
                'sentiment': ['mae'],
                'interaction': ['accuracy']
            }
        )
        
        total_params = self.model.count_params()
        logging.info(f"Modelo compacto construido: {total_params:,} parámetros")
        
        return self.model
    
    def prepare_training_data(self, matched_reviews_df):
        """Prepara datos para entrenamiento"""
        if matched_reviews_df.empty:
            raise ValueError("No hay datos de entrenamiento")
        
        # Crear mapeos de índices
        unique_users = matched_reviews_df['user_id'].unique()
        unique_items = matched_reviews_df['item_id'].unique()
        unique_cities = matched_reviews_df['ciudad'].unique()
        
        self.user_to_idx = {user: idx for idx, user in enumerate(unique_users)}
        self.item_to_idx = {item: idx for idx, item in enumerate(unique_items)}
        self.city_to_idx = {city: idx for idx, city in enumerate(unique_cities)}
        
        training_data = []
        
        for idx, row in matched_reviews_df.iterrows():
            user_idx = self.user_to_idx.get(row['user_id'], 0)
            item_idx = self.item_to_idx.get(row['item_id'], 0)
            city_idx = self.city_to_idx.get(row['ciudad'], 0)
            
            # Features simplificadas
            contextual_features = [0.5] * 10  # Placeholder
            temporal_features = self._extract_temporal_features(row.get('fecha'))
            weather_features = [0.5] * 15  # Placeholder
            
            # Targets basados en sentimiento
            sentiment = row.get('sentimiento', 'neutro')
            if sentiment == 'negativo':
                rating_target = 0.25
                sentiment_target = 0.0
            elif sentiment == 'positivo':
                rating_target = 0.875
                sentiment_target = 1.0
            else:
                rating_target = 0.6
                sentiment_target = 0.5
            
            training_data.append({
                'user_id': user_idx,
                'item_id': item_idx,
                'city_id': city_idx,
                'contextual_features': contextual_features,
                'temporal_features': temporal_features,
                'weather_sentiment': weather_features,
                'rating': rating_target,
                'sentiment': sentiment_target,
                'interaction': 1.0,
                'sample_weight': row.get('confianza', 0.8)
            })
        
        return pd.DataFrame(training_data)
    
    def _extract_temporal_features(self, date):
        """Extrae features temporales (estación)"""
        try:
            date = pd.to_datetime(date, errors='coerce')
            if pd.isna(date):
                return [0.25, 0.25, 0.25, 0.25]
            
            month = date.month
            if month in [12, 1, 2]:
                return [1, 0, 0, 0]  # Invierno
            elif month in [3, 4, 5]:
                return [0, 1, 0, 0]  # Primavera
            elif month in [6, 7, 8]:
                return [0, 0, 1, 0]  # Verano
            else:
                return [0, 0, 0, 1]  # Otoño
        except:
            return [0.25, 0.25, 0.25, 0.25]
    
    def train_model(self, training_data, validation_split=0.2, epochs=30, batch_size=128):
        """Entrena el modelo optimizado para RTX 3050 Ti"""
        logging.info("Iniciando entrenamiento optimizado...")
        
        # Preparar datos
        X = {
            'user_id': training_data['user_id'].values.astype('int32'),
            'item_id': training_data['item_id'].values.astype('int32'),
            'city_id': training_data['city_id'].values.astype('int32'),
            'contextual_features': np.array(training_data['contextual_features'].tolist(), dtype='float32'),
            'temporal_features': np.array(training_data['temporal_features'].tolist(), dtype='float32'),
            'weather_sentiment': np.array(training_data['weather_sentiment'].tolist(), dtype='float32')
        }
        
        y = {
            'rating': training_data['rating'].values.astype('float32'),
            'sentiment': training_data['sentiment'].values.astype('float32'),
            'interaction': training_data['interaction'].values.astype('float32')
        }
        
        sample_weights = training_data['sample_weight'].values.astype('float32')
        
        # Callbacks
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=5, restore_best_weights=True, verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss', factor=0.7, patience=3, min_lr=1e-6, verbose=1
            )
        ]
        
        # Entrenar
        self.history = self.model.fit(
            X, y,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            sample_weight=sample_weights,
            callbacks=callbacks,
            verbose=1
        )
        
        logging.info("Entrenamiento completado!")
        return self.history
    
    def save_model(self, model_path):
        """Guarda el modelo y mapeos"""
        self.model.save(model_path)
        
        mappings = {
            'user_to_idx': self.user_to_idx,
            'item_to_idx': self.item_to_idx,
            'city_to_idx': self.city_to_idx
        }
        
        with open(model_path.replace('.h5', '_mappings.pkl'), 'wb') as f:
            pickle.dump(mappings, f)
        
        logging.info(f"Modelo guardado en {model_path}")

### Sistema Re-Ranking Diario

In [9]:
class OptimizedScraper:
    """Scraper optimizado y simplificado para reviews de turismo"""
    
    def __init__(self, headless=True):
        self.headless = headless
        self.driver = None
        
    def _setup_driver(self):
        """Configura driver de Selenium optimizado"""
        if not SELENIUM_AVAILABLE:
            logging.warning("Selenium no disponible. Scraping deshabilitado.")
            return None
            
        if self.driver:
            return self.driver
            
        options = webdriver.ChromeOptions()
        if self.headless:
            options.add_argument('--headless=new')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-gpu')
        options.page_load_strategy = 'eager'
        
        try:
            self.driver = webdriver.Chrome(options=options)
            self.driver.set_page_load_timeout(15)
            return self.driver
        except Exception as e:
            logging.error(f"Error configurando driver: {e}")
            return None
    
    def _quit_driver(self):
        """Cierra el driver"""
        if self.driver:
            try:
                self.driver.quit()
            except:
                pass
            self.driver = None
    
    def scrape_quick_reviews(self, city, date=None, max_items=10):
        """Scraping rápido y ligero de reviews recientes"""
        if date is None:
            date = datetime.now().strftime('%Y-%m-%d')
            
        reviews = []
        
        try:
            driver = self._setup_driver()
            if not driver:
                return reviews
            
            # URL simplificada de búsqueda
            search_url = f"https://www.google.com/search?q={city}+turismo+opiniones+{date}"
            driver.get(search_url)
            
            # Espera mínima
            time.sleep(1)
            
            # Extracción básica de snippets de Google
            snippets = driver.find_elements(By.CSS_SELECTOR, '.VwiC3b, .s3v9rd')
            
            for snippet in snippets[:max_items]:
                try:
                    text = snippet.text.strip()
                    if text and len(text) > 20:
                        reviews.append({
                            'texto': text,
                            'fecha': date,
                            'fuente': 'web_scraping',
                            'ciudad': city
                        })
                except:
                    continue
                    
        except Exception as e:
            logging.warning(f"Error en scraping rápido: {e}")
        finally:
            self._quit_driver()
            
        return reviews
    
    def load_or_scrape(self, city, csv_path=None):
        """Intenta cargar datos de CSV, si no existe hace scraping ligero"""
        # Primero intentar cargar CSV si existe
        if csv_path and os.path.exists(csv_path):
            try:
                df = pd.read_csv(csv_path)
                # Filtrar por ciudad si está en el CSV
                if 'ciudad' in df.columns:
                    df = df[df['ciudad'].str.contains(city, case=False, na=False)]
                elif 'titulo' in df.columns:
                    # Buscar ciudad en título si no hay columna ciudad
                    df = df[df['titulo'].str.contains(city, case=False, na=False)]
                
                logging.info(f"Cargados {len(df)} registros desde CSV para {city}")
                return df
                
            except Exception as e:
                logging.warning(f"Error cargando CSV: {e}")
        
        # Si no hay CSV y Selenium está disponible, hacer scraping mínimo
        if SELENIUM_AVAILABLE:
            logging.info(f"Realizando scraping rápido para {city}")
            reviews = self.scrape_quick_reviews(city, max_items=5)
            
            if reviews:
                return pd.DataFrame(reviews)
        
        return pd.DataFrame()

In [11]:
class DailyRerankingSystem:
    """Sistema de re-ranking con factores dinámicos, clima real y datos de scraping opcional"""
    
    def __init__(self, model_path=None, enable_scraping=False):
        self.weather_cache = {}
        self.trend_cache = {}
        self.scraping_cache = {}
        self.api_key = WEATHER_API_KEY
        self.enable_scraping = enable_scraping
        
        if self.enable_scraping:
            self.scraper = OptimizedScraper(headless=True)
        else:
            self.scraper = None
        
        if model_path and os.path.exists(model_path):
            self.model = tf.keras.models.load_model(model_path)
            
            mappings_path = model_path.replace('.h5', '_mappings.pkl')
            if os.path.exists(mappings_path):
                with open(mappings_path, 'rb') as f:
                    mappings = pickle.load(f)
                    self.user_to_idx = mappings.get('user_to_idx', {})
                    self.item_to_idx = mappings.get('item_to_idx', {})
                    self.city_to_idx = mappings.get('city_to_idx', {})
    
    def get_scraping_insights(self, city, csv_path=None):
        """Obtiene insights de scraping si está disponible"""
        if not self.enable_scraping or not self.scraper:
            return None
            
        # Verificar caché
        cache_key = f"{city}_{datetime.now().strftime('%Y-%m-%d')}"
        if cache_key in self.scraping_cache:
            return self.scraping_cache[cache_key]
        
        try:
            # Intentar cargar CSV o hacer scraping ligero
            df = self.scraper.load_or_scrape(city, csv_path)
            
            if not df.empty:
                # Análisis básico de sentimientos si existe la columna
                insights = {
                    'total_reviews': len(df),
                    'recent_topics': [],
                    'sentiment_trend': 'neutral'
                }
                
                if 'sentimiento' in df.columns:
                    # Calcular tendencia de sentimiento
                    sentiment_counts = df['sentimiento'].value_counts()
                    if 'positivo' in sentiment_counts and sentiment_counts['positivo'] > len(df) * 0.6:
                        insights['sentiment_trend'] = 'positive'
                    elif 'negativo' in sentiment_counts and sentiment_counts['negativo'] > len(df) * 0.4:
                        insights['sentiment_trend'] = 'negative'
                
                if 'texto' in df.columns:
                    # Extraer temas frecuentes (muy simplificado)
                    all_text = ' '.join(df['texto'].dropna().astype(str))
                    keywords = ['playa', 'museo', 'restaurante', 'hotel', 'tour', 'transporte']
                    for keyword in keywords:
                        if keyword in all_text.lower():
                            insights['recent_topics'].append(keyword)
                
                self.scraping_cache[cache_key] = insights
                return insights
                
        except Exception as e:
            logging.warning(f"Error obteniendo insights de scraping: {e}")
            
        return None
    
    def get_real_time_weather(self, city):
        """Obtiene clima real usando OpenWeatherMap API"""
        # Verificar caché
        if city in self.weather_cache:
            cache_time = self.weather_cache[city]['timestamp']
            if (datetime.now() - cache_time).seconds < 3600:  # Cache por 1 hora
                return self.weather_cache[city]['data']
        
        try:
            # Llamada a la API
            url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={self.api_key}&units=metric&lang=es"
            response = requests.get(url, timeout=5)
            
            if response.status_code == 200:
                datos = response.json()
                
                weather_data = {
                    'description': datos["weather"][0]["description"],
                    'temperature': datos["main"]["temp"],
                    'temp_min': datos["main"]["temp_min"],
                    'temp_max': datos["main"]["temp_max"],
                    'humidity': datos["main"]["humidity"],
                    'feels_like': datos["main"]["feels_like"],
                    'pressure': datos["main"]["pressure"],
                    'wind_speed': datos["wind"]["speed"] if "wind" in datos else 0,
                    'clouds': datos["clouds"]["all"] if "clouds" in datos else 0,
                    'rain': datos.get("rain", {}).get("1h", 0),
                    'condition': datos["weather"][0]["main"]
                }
                
                # Guardar en caché
                self.weather_cache[city] = {
                    'data': weather_data,
                    'timestamp': datetime.now()
                }
                
                return weather_data
                
        except Exception as e:
            logging.error(f"Error obteniendo clima para {city}: {e}")
        
        # Datos por defecto si falla la API
        return {
            'description': 'parcialmente nublado',
            'temperature': 20,
            'humidity': 60,
            'wind_speed': 5,
            'rain': 0,
            'condition': 'Clouds'
        }
    
    def calculate_weather_factor(self, weather_data):
        """Calcula factor de ajuste basado en clima real"""
        temp = weather_data.get('temperature', 20)
        rain = weather_data.get('rain', 0)
        condition = weather_data.get('condition', 'Clear')
        humidity = weather_data.get('humidity', 60)
        wind = weather_data.get('wind_speed', 5)
        
        # Factor de temperatura (óptima entre 18-25°C)
        if 18 <= temp <= 25:
            temp_factor = 1.0
        elif temp < 10 or temp > 35:
            temp_factor = 0.5
        else:
            temp_factor = 0.7 + (0.3 * (1 - abs(temp - 21.5) / 15))
        
        # Factor de lluvia
        if rain == 0:
            rain_factor = 1.0
        elif rain < 2:
            rain_factor = 0.8
        elif rain < 5:
            rain_factor = 0.6
        else:
            rain_factor = 0.3
        
        # Factor de condición general
        condition_factors = {
            'Clear': 1.0,
            'Clouds': 0.85,
            'Rain': 0.5,
            'Drizzle': 0.7,
            'Thunderstorm': 0.3,
            'Snow': 0.4,
            'Mist': 0.7,
            'Fog': 0.6
        }
        condition_factor = condition_factors.get(condition, 0.7)
        
        # Factor de humedad (óptima 40-60%)
        if 40 <= humidity <= 60:
            humidity_factor = 1.0
        else:
            humidity_factor = 0.9 - abs(humidity - 50) * 0.002
        
        # Factor de viento (óptimo < 10 km/h)
        if wind < 10:
            wind_factor = 1.0
        elif wind < 20:
            wind_factor = 0.8
        else:
            wind_factor = 0.6
        
        # Cálculo del factor general
        overall_factor = (
            temp_factor * 0.3 +
            rain_factor * 0.25 +
            condition_factor * 0.25 +
            humidity_factor * 0.1 +
            wind_factor * 0.1
        )
        
        return {
            'overall': overall_factor,
            'temperature': temp_factor,
            'rain': rain_factor,
            'condition': condition_factor,
            'humidity': humidity_factor,
            'wind': wind_factor,
            'indoor_boost': 1.5 if rain > 2 or temp < 10 or temp > 30 else 1.0,
            'outdoor_penalty': 0.5 if rain > 5 or wind > 20 else 1.0
        }
    
    def calculate_temporal_factor(self, current_time=None):
        """Calcula factor temporal"""
        if current_time is None:
            current_time = datetime.now()
        
        hour = current_time.hour
        weekday = current_time.weekday()
        
        # Factor por hora del día
        if 10 <= hour <= 20:
            hour_factor = 1.0
        elif 8 <= hour < 10 or 20 < hour <= 22:
            hour_factor = 0.8
        else:
            hour_factor = 0.4
        
        # Factor por día de la semana
        if weekday >= 5:  # Fin de semana
            weekday_factor = 1.2
        elif weekday == 4:  # Viernes
            weekday_factor = 1.1
        else:
            weekday_factor = 0.9
        
        return {
            'hour_factor': hour_factor,
            'weekday_factor': weekday_factor,
            'overall': hour_factor * weekday_factor,
            'peak_time': 10 <= hour <= 20,
            'weekend': weekday >= 5
        }
    
    def rerank_recommendations(self, base_recommendations, city, user_preferences=None, csv_path=None):
        """Re-rankea recomendaciones con factores dinámicos, clima real y datos de scraping opcional"""
        logging.info(f"Re-rankeando recomendaciones para {city}...")
        
        # Obtener clima real
        weather_data = self.get_real_time_weather(city)
        weather_factors = self.calculate_weather_factor(weather_data)
        
        # Obtener factores temporales
        temporal_factors = self.calculate_temporal_factor()
        
        # Obtener insights de scraping si está habilitado
        scraping_insights = None
        if self.enable_scraping:
            scraping_insights = self.get_scraping_insights(city, csv_path)
        
        # Re-rankear cada recomendación
        reranked = []
        for rec in base_recommendations:
            base_score = rec.get('combined_score', 0.5)
            
            # Aplicar factores dinámicos
            weather_adjustment = weather_factors['overall']
            temporal_adjustment = temporal_factors['overall']
            
            # Factor adicional basado en scraping insights
            scraping_boost = 1.0
            if scraping_insights:
                # Boost si hay sentimiento positivo general
                if scraping_insights['sentiment_trend'] == 'positive':
                    scraping_boost *= 1.1
                elif scraping_insights['sentiment_trend'] == 'negative':
                    scraping_boost *= 0.9
                
                # Boost si el item está relacionado con temas trending
                if 'titulo' in rec:
                    item_title = rec['titulo'].lower()
                    for topic in scraping_insights.get('recent_topics', []):
                        if topic in item_title:
                            scraping_boost *= 1.15
                            break
            
            # Ajuste específico por tipo de actividad
            if 'tipo' in rec:
                if rec['tipo'] == 'outdoor' and weather_factors['rain'] < 0.7:
                    weather_adjustment *= weather_factors['outdoor_penalty']
                elif rec['tipo'] == 'indoor' and weather_factors['rain'] < 0.7:
                    weather_adjustment *= weather_factors['indoor_boost']
            
            # Score final con todos los factores
            final_score = base_score * (
                0.4 + 
                0.25 * weather_adjustment + 
                0.25 * temporal_adjustment + 
                0.1 * scraping_boost
            )
            
            reranked_rec = rec.copy()
            reranked_rec.update({
                'original_score': base_score,
                'final_score': final_score,
                'weather_factor': weather_adjustment,
                'temporal_factor': temporal_adjustment,
                'scraping_boost': scraping_boost,
                'weather_data': weather_data,
                'scraping_insights': scraping_insights,
                'adjustments_applied': True
            })
            
            reranked.append(reranked_rec)
        
        # Ordenar por score final
        reranked.sort(key=lambda x: x['final_score'], reverse=True)
        
        return reranked

### Pipeline Completo

In [13]:
class TourismRecommenderPipeline:
    """Pipeline completo automatizado con soporte para scraping opcional"""
    
    def __init__(self, config=None):
        self.config = config or self._get_default_config()
        self.preprocessor = TourismDataPreprocessor()
        self.model = TourismRecommenderModel()
        
        # Inicializar reranking con o sin scraping
        enable_scraping = self.config.get('enable_scraping', False)
        self.reranking_system = DailyRerankingSystem(enable_scraping=enable_scraping)
        
        self.db_path = self.config.get('db_path', 'tourism_recommender.db')
        self._setup_database()
        
    def _get_default_config(self):
        """Configuración por defecto"""
        return {
            'model_path': 'tourism_model.h5',
            'db_path': 'tourism_recommender.db',
            'batch_size': 128,
            'epochs': 30,
            'embedding_dim': 32,
            'dense_units': 64,
            'max_users': 10000,
            'max_items': 5000,
            'enable_scraping': False,  # Deshabilitado por defecto
            'scraping_csv_path': None  # Path al CSV de scraping si existe
        }
    
    def _setup_database(self):
        """Configura base de datos SQLite"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        # Tabla de recomendaciones
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS recommendations (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                user_id TEXT,
                city TEXT,
                item_id TEXT,
                score REAL,
                weather_factor REAL,
                temporal_factor REAL,
                timestamp DATETIME,
                metadata TEXT
            )
        ''')
        
        # Tabla de métricas del modelo
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS model_metrics (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp DATETIME,
                val_loss REAL,
                val_accuracy REAL,
                training_time REAL,
                num_users INTEGER,
                num_items INTEGER
            )
        ''')
        
        conn.commit()
        conn.close()
    
    def run_complete_pipeline(self, file_paths, retrain=False):
        """Ejecuta el pipeline completo"""
        logging.info("=== INICIANDO PIPELINE COMPLETO ===")
        start_time = time.time()
        
        try:
            # 1. Cargar y limpiar datos
            logging.info("Paso 1: Cargando datos...")
            datasets = self.preprocessor.load_and_clean_datasets(file_paths)
            
            if not datasets.get('activities') or datasets['activities'].empty:
                raise ValueError("No se encontraron datos de actividades")
            
            if not datasets.get('reviews') or datasets['reviews'].empty:
                raise ValueError("No se encontraron datos de reviews")
            
            # 2. Emparejar reviews con actividades
            logging.info("Paso 2: Emparejando reviews con actividades...")
            matched_reviews = self.preprocessor.match_reviews_to_activities(
                datasets['activities'], 
                datasets['reviews']
            )
            
            if matched_reviews.empty:
                raise ValueError("No se pudieron emparejar reviews con actividades")
            
            # 3. Crear user_ids sintéticos
            logging.info("Paso 3: Creando user_ids sintéticos...")
            matched_reviews = self.preprocessor.create_synthetic_user_ids(matched_reviews)
            
            # 4. Entrenar o cargar modelo
            model_path = self.config['model_path']
            
            if retrain or not os.path.exists(model_path):
                logging.info("Paso 4: Entrenando modelo...")
                
                # Preparar datos de entrenamiento
                training_data = self.model.prepare_training_data(matched_reviews)
                
                # Limitar tamaño para GPU
                if len(training_data) > self.config['max_users']:
                    training_data = training_data.sample(self.config['max_users'])
                
                # Construir modelo
                num_users = len(self.model.user_to_idx)
                num_items = len(self.model.item_to_idx) 
                num_cities = len(self.model.city_to_idx)
                
                self.model.build_hybrid_model(
                    num_users=num_users,
                    num_items=num_items,
                    num_cities=num_cities,
                    contextual_features_dim=10,
                    weather_features_dim=15
                )
                
                # Entrenar
                history = self.model.train_model(
                    training_data,
                    epochs=self.config['epochs'],
                    batch_size=self.config['batch_size']
                )
                
                # Guardar modelo
                self.model.save_model(model_path)
                
                # Guardar métricas
                self._save_metrics(history, num_users, num_items)
                
            else:
                logging.info("Paso 4: Cargando modelo existente...")
                self.model.model = tf.keras.models.load_model(model_path)
                
                mappings_path = model_path.replace('.h5', '_mappings.pkl')
                with open(mappings_path, 'rb') as f:
                    mappings = pickle.load(f)
                    self.model.user_to_idx = mappings['user_to_idx']
                    self.model.item_to_idx = mappings['item_to_idx']
                    self.model.city_to_idx = mappings['city_to_idx']
            
            # 5. Actualizar sistema de re-ranking
            logging.info("Paso 5: Actualizando sistema de re-ranking...")
            self.reranking_system = DailyRerankingSystem(
                model_path=model_path,
                enable_scraping=self.config.get('enable_scraping', False)
            )
            
            duration = time.time() - start_time
            logging.info(f"=== PIPELINE COMPLETADO EN {duration/60:.1f} MINUTOS ===")
            
            return True
            
        except Exception as e:
            logging.error(f"Error en pipeline: {e}")
            return False
    
    def _save_metrics(self, history, num_users, num_items):
        """Guarda métricas del modelo en BD"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        val_loss = min(history.history['val_loss'])
        val_acc = max(history.history.get('val_interaction_accuracy', [0]))
        
        cursor.execute('''
            INSERT INTO model_metrics 
            (timestamp, val_loss, val_accuracy, training_time, num_users, num_items)
            VALUES (?, ?, ?, ?, ?, ?)
        ''', (datetime.now(), val_loss, val_acc, 0, num_users, num_items))
        
        conn.commit()
        conn.close()
    
    def generate_recommendations(self, user_id, city, num_recommendations=10):
        """Genera recomendaciones personalizadas con re-ranking dinámico y scraping opcional"""
        logging.info(f"Generando recomendaciones para {user_id} en {city}")
        
        # Verificar si el modelo está cargado
        if self.model.model is None:
            raise ValueError("Modelo no cargado. Ejecute el pipeline primero.")
        
        # Obtener índices
        user_idx = self.model.user_to_idx.get(user_id, 0)
        city_idx = self.model.city_to_idx.get(city, 0)
        
        # Generar predicciones base para todos los items
        base_recommendations = []
        
        for item_id, item_idx in list(self.model.item_to_idx.items())[:100]:  # Limitar para velocidad
            # Preparar entrada
            X = {
                'user_id': np.array([user_idx]),
                'item_id': np.array([item_idx]),
                'city_id': np.array([city_idx]),
                'contextual_features': np.array([[0.5] * 10]),
                'temporal_features': np.array([[0.25, 0.25, 0.25, 0.25]]),
                'weather_sentiment': np.array([[0.5] * 15])
            }
            
            # Predicción
            try:
                pred = self.model.model.predict(X, verbose=0)
                
                base_recommendations.append({
                    'item_id': item_id,
                    'predicted_rating': float(pred[0][0][0]),
                    'predicted_sentiment': float(pred[1][0][0]),
                    'interaction_probability': float(pred[2][0][0]),
                    'combined_score': float(
                        pred[0][0][0] * 0.4 + 
                        pred[1][0][0] * 0.3 + 
                        pred[2][0][0] * 0.3
                    )
                })
            except:
                continue
        
        # Aplicar re-ranking con clima real y scraping opcional
        csv_path = self.config.get('scraping_csv_path')
        reranked = self.reranking_system.rerank_recommendations(
            base_recommendations, city, csv_path=csv_path
        )
        
        # Guardar en BD
        self._save_recommendations(user_id, city, reranked[:num_recommendations])
        
        return reranked[:num_recommendations]
    
    def _save_recommendations(self, user_id, city, recommendations):
        """Guarda recomendaciones en BD"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        for rec in recommendations:
            cursor.execute('''
                INSERT INTO recommendations 
                (user_id, city, item_id, score, weather_factor, temporal_factor, timestamp, metadata)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                user_id, 
                city, 
                rec['item_id'],
                rec.get('final_score', 0),
                rec.get('weather_factor', 1.0),
                rec.get('temporal_factor', 1.0),
                datetime.now(),
                json.dumps(rec.get('weather_data', {}))
            ))
        
        conn.commit()
        conn.close()

### Sistema de explicabilidad

In [15]:
class ExplainabilityEngine:
    """Motor de explicabilidad para las recomendaciones"""
    
    def generate_explanation(self, recommendation, city):
        """Genera explicación legible para una recomendación"""
        explanations = []
        
        # Score base
        base_score = recommendation.get('original_score', recommendation.get('combined_score', 0))
        explanations.append(f"Puntuación base: {base_score:.2%}")
        
        # Factor clima
        if 'weather_data' in recommendation:
            weather = recommendation['weather_data']
            explanations.append(f"Clima actual en {city}: {weather['description']}")
            explanations.append(f"Temperatura: {weather['temperature']:.1f}°C")
            
            if weather.get('rain', 0) > 0:
                explanations.append(f"Lluvia: {weather['rain']:.1f}mm/h")
        
        # Factor temporal
        if recommendation.get('temporal_factor', 1.0) > 1.1:
            explanations.append("Horario ideal para esta actividad")
        
        # Insights de scraping
        if 'scraping_insights' in recommendation and recommendation['scraping_insights']:
            insights = recommendation['scraping_insights']
            if insights['sentiment_trend'] == 'positive':
                explanations.append("Opiniones recientes muy positivas")
            elif insights['sentiment_trend'] == 'negative':
                explanations.append("Algunas opiniones negativas recientes")
            
            if insights.get('recent_topics'):
                explanations.append(f"Temas populares: {', '.join(insights['recent_topics'][:3])}")
        
        # Score final
        final_score = recommendation.get('final_score', base_score)
        if final_score != base_score:
            improvement = ((final_score / base_score) - 1) * 100
            if improvement > 0:
                explanations.append(f"Mejora por condiciones actuales: +{improvement:.0f}%")
            else:
                explanations.append(f"Ajuste por condiciones: {improvement:.0f}%")
        
        return {
            'item_id': recommendation['item_id'],
            'score': final_score,
            'explanations': explanations
        }

### Función principal y Scheduler

In [19]:
def run_daily_update(config_file='config.json'):
    """Ejecuta actualización diaria del sistema"""
    logging.info("Iniciando actualización diaria...")
    
    # Cargar configuración
    if os.path.exists(config_file):
        with open(config_file, 'r') as f:
            config = json.load(f)
    else:
        config = {
            'file_paths': {
                'activities': ['activities.csv'],
                'reviews': 'reviews.csv'
            },
            'model_path': 'tourism_model.h5',
            'db_path': 'tourism_recommender.db',
            'enable_scraping': False
        }
    
    # Ejecutar pipeline
    pipeline = TourismRecommenderPipeline(config)
    success = pipeline.run_complete_pipeline(
        config['file_paths'],
        retrain=False  # Solo reentrenar si es necesario
    )
    
    if success:
        logging.info("Actualización diaria completada exitosamente")
    else:
        logging.error("Error en actualización diaria")
    
    return success

In [21]:
def schedule_daily_updates():
    """Programa actualizaciones diarias"""
    import schedule
    
    # Programar actualización diaria a las 3 AM
    schedule.every().day.at("03:00").do(run_daily_update)
    
    logging.info("Scheduler iniciado. Actualización diaria programada a las 03:00")
    
    while True:
        schedule.run_pending()
        time.sleep(60)

In [23]:
def main():
    """Función principal de ejemplo"""
    
    # Configuración con scraping opcional
    config = {
        'file_paths': {
            'activities': [
                'atracciones_civitatis_procesado.csv',
                'booking_atracciones_limpios.csv',
                'booking_hoteles_limpio.csv',
                'detripadvisor_procesado.csv',
                'getyourguide_procesado.csv'
            ],
            'reviews': 'comentarios_final_definitivo_con_descripcion.csv'
        },
        'model_path': 'tourism_model_rtx3050ti.h5',
        'db_path': 'tourism_recommender.db',
        'batch_size': 128,
        'epochs': 30,
        'max_users': 10000,
        'max_items': 5000,
        'enable_scraping': False,  # Cambiar a True para habilitar scraping
        'scraping_csv_path': 'reviews_scraping_20250916_204537.csv'  # Path al CSV de scraping
    }
    
    # Crear pipeline
    pipeline = TourismRecommenderPipeline(config)
    
    # Opción 1: Ejecutar pipeline completo (entrenamiento)
    print("Ejecutando pipeline completo...")
    success = pipeline.run_complete_pipeline(
        config['file_paths'],
        retrain=True  # Forzar reentrenamiento
    )
    
    if success:
        # Opción 2: Generar recomendaciones con scraping opcional
        print("\nGenerando recomendaciones...")
        
        # Sin scraping (solo clima)
        recommendations_weather_only = pipeline.generate_recommendations(
            user_id='user_1',
            city='Madrid',
            num_recommendations=5
        )
        
        # Con scraping si está habilitado
        if config['enable_scraping']:
            pipeline.config['enable_scraping'] = True
            pipeline.reranking_system = DailyRerankingSystem(enable_scraping=True)
            
            recommendations_with_scraping = pipeline.generate_recommendations(
                user_id='user_1',
                city='Barcelona',
                num_recommendations=5
            )
        
        # Mostrar resultados con explicaciones
        explainer = ExplainabilityEngine()
        
        print("\n=== RECOMENDACIONES PARA MADRID (Solo Clima) ===")
        if recommendations_weather_only:
            weather = recommendations_weather_only[0].get('weather_data', {})
            print(f"Clima actual: {weather.get('description', 'No disponible')}")
            print(f"Temperatura: {weather.get('temperature', 'N/A')}°C")
            print("\nTop 5 actividades recomendadas:")
            
            for i, rec in enumerate(recommendations_weather_only, 1):
                print(f"\n{i}. {rec['item_id']}")
                print(f"   Score final: {rec.get('final_score', 0):.3f}")
                print(f"   Factor clima: {rec.get('weather_factor', 1.0):.2f}")
                print(f"   Factor temporal: {rec.get('temporal_factor', 1.0):.2f}")
        
        if config['enable_scraping'] and 'recommendations_with_scraping' in locals():
            print("\n=== RECOMENDACIONES PARA BARCELONA (Con Scraping) ===")
            insights = recommendations_with_scraping[0].get('scraping_insights', {})
            if insights:
                print(f"Reviews analizadas: {insights.get('total_reviews', 0)}")
                print(f"Tendencia de sentimiento: {insights.get('sentiment_trend', 'neutral')}")
                print(f"Temas populares: {', '.join(insights.get('recent_topics', []))}")
            
            print("\nTop 5 actividades recomendadas:")
            for i, rec in enumerate(recommendations_with_scraping[:5], 1):
                print(f"\n{i}. {rec['item_id']}")
                print(f"   Score final: {rec.get('final_score', 0):.3f}")
                print(f"   Boost scraping: {rec.get('scraping_boost', 1.0):.2f}")
    
    # Información sobre el uso del scraping
    print("\n" + "="*60)
    print("INFORMACIÓN SOBRE EL SCRAPING:")
    print("="*60)
    print("El sistema puede funcionar de dos formas:")
    print("1. Solo con clima (por defecto): Usa la API de OpenWeatherMap")
    print("2. Con scraping opcional: Lee CSV si existe o hace scraping ligero")
    print("\nPara habilitar scraping:")
    print("  config['enable_scraping'] = True")
    print("  config['scraping_csv_path'] = 'path/al/archivo.csv'")
    print("\nVentajas del modo solo clima:")
    print("  - Más rápido y ligero")
    print("  - No requiere dependencias adicionales")
    print("  - Siempre disponible con API key")
    print("\nVentajas del modo con scraping:")
    print("  - Insights adicionales de reviews recientes")
    print("  - Detección de tendencias y temas populares")
    print("  - Mejor personalización basada en opiniones reales")

    # Descomentar para activar:
    # schedule_daily_updates()

In [None]:
if __name__ == "__main__":
    main()