In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle
from datetime import datetime, timedelta
import os

In [20]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    tf.config.experimental.set_virtual_device_configuration(
        physical_devices[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3584)]  # Limitar a 3.5GB
    )
    print("GPU configurada para RTX 3050 Ti (3.5GB VRAM limit)")

In [22]:
class TourismRecommenderModel:
    def __init__(self, embedding_dim=32, dense_units=64):
        """
        Modelo optimizado para RTX 3050 Ti - dimensiones reducidas
        """
        self.embedding_dim = embedding_dim
        self.dense_units = dense_units
        self.model = None
        self.history = None
        
    def build_hybrid_model(self, num_users, num_items, num_cities, 
                          contextual_features_dim=10, weather_features_dim=15):
        """
        Modelo híbrido compacto optimizado para 4GB VRAM
        """
        print("Construyendo modelo híbrido compacto para RTX 3050 Ti...")
        
        # === ENTRADAS REDUCIDAS ===
        user_input = layers.Input(shape=(), name='user_id')
        item_input = layers.Input(shape=(), name='item_id')
        city_input = layers.Input(shape=(), name='city_id')
        
        # Features reducidas para VRAM
        contextual_input = layers.Input(shape=(contextual_features_dim,), name='contextual_features')
        temporal_input = layers.Input(shape=(4,), name='temporal_features')  # Solo estación del año
        weather_sentiment_input = layers.Input(shape=(weather_features_dim,), name='weather_sentiment')
        
        # === EMBEDDINGS COMPACTOS ===
        user_embedding = layers.Embedding(
            num_users + 1, self.embedding_dim, 
            embeddings_regularizer=keras.regularizers.l2(1e-6)  # Regularización reducida
        )(user_input)
        user_vec = layers.Flatten()(user_embedding)
        
        item_embedding = layers.Embedding(
            num_items + 1, self.embedding_dim,
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )(item_input)
        item_vec = layers.Flatten()(item_embedding)
        
        city_embedding = layers.Embedding(
            num_cities + 1, self.embedding_dim // 2,
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )(city_input)
        city_vec = layers.Flatten()(city_embedding)
        
        # === PROCESAMIENTO COMPACTO ===
        # Red contextual simplificada
        contextual_dense = layers.Dense(self.dense_units//2, activation='relu')(contextual_input)
        contextual_dense = layers.Dropout(0.2)(contextual_dense)
        
        # Red temporal simplificada
        temporal_dense = layers.Dense(16, activation='relu')(temporal_input)
        
        # Red climática compacta pero especializada
        weather_dense = layers.Dense(32, activation='relu', name='climate_layer')(weather_sentiment_input)
        weather_dense = layers.Dropout(0.2)(weather_dense)
        
        # === COMBINACIÓN EFICIENTE ===
        combined = layers.Concatenate(name='feature_fusion')([
            user_vec, item_vec, city_vec,
            contextual_dense, temporal_dense, weather_dense
        ])
        
        # === RED COMPACTA ===
        x = layers.Dense(self.dense_units, activation='relu', name='main_layer')(combined)
        x = layers.Dropout(0.3)(x)
        
        x = layers.Dense(self.dense_units//2, activation='relu', name='output_layer')(x)
        x = layers.Dropout(0.2)(x)
        
        # === SALIDAS SIMPLIFICADAS ===
        rating_output = layers.Dense(1, activation='sigmoid', name='rating')(x)
        sentiment_output = layers.Dense(1, activation='sigmoid', name='sentiment')(x)
        interaction_output = layers.Dense(1, activation='sigmoid', name='interaction')(x)
        
        # === MODELO FINAL ===
        self.model = Model(
            inputs=[user_input, item_input, city_input, 
                   contextual_input, temporal_input, weather_sentiment_input],
            outputs=[rating_output, sentiment_output, interaction_output],
            name='TourismRecommenderCompact'
        )
        
        # Optimizer optimizado para GPU
        optimizer = keras.optimizers.Adam(
            learning_rate=0.002,  # Learning rate más alto para convergencia rápida
            epsilon=1e-7
        )
        
        self.model.compile(
            optimizer=optimizer,
            loss={
                'rating': 'mse',
                'sentiment': 'mse', 
                'interaction': 'binary_crossentropy'
            },
            loss_weights={
                'rating': 1.0,
                'sentiment': 0.7,
                'interaction': 0.3
            },
            metrics={
                'rating': ['mae'],
                'sentiment': ['mae'],
                'interaction': ['accuracy']
            }
        )
        
        total_params = self.model.count_params()
        print(f"Modelo compacto construido: {total_params:,} parámetros")
        print(f"Memoria estimada: ~{total_params * 4 / (1024**2):.1f}MB")
        
        return self.model
    
    def prepare_training_data(self, processed_data):
        """
        Preparación optimizada para GPU rápida
        """
        print("Preparando datos de entrenamiento optimizados para GPU...")
        
        # Usar datos ya optimizados del preprocessor
        matched_reviews_df = processed_data.get('training_sample', pd.DataFrame())
        training_mappings = processed_data.get('training_mappings', {})
        
        if matched_reviews_df.empty:
            raise ValueError("No hay datos de entrenamiento preparados")
        
        self.user_to_idx = training_mappings['user_to_idx']
        self.item_to_idx = training_mappings['item_to_idx'] 
        self.city_to_idx = training_mappings['city_to_idx']
        
        # Crear dataset de entrenamiento compacto
        training_data = []
        
        print(f"Procesando {len(matched_reviews_df)} reviews para entrenamiento...")
        
        for idx, row in matched_reviews_df.iterrows():
            if idx % 5000 == 0:
                print(f"Progreso: {idx}/{len(matched_reviews_df)}")
            
            user_idx = self.user_to_idx.get(row['user_id'], 0)
            item_idx = self.item_to_idx.get(row['item_id'], 0)
            city_idx = self.city_to_idx.get(row['ciudad'], 0)
            
            # Features simplificadas para velocidad
            contextual_features = self._extract_contextual_features_fast(row)
            temporal_features = self._extract_temporal_features_fast(row)
            weather_sentiment_features = self._extract_weather_sentiment_features_fast(row)
            
            # Targets optimizados
            sentiment_cat = row['sentimiento']
            if sentiment_cat == 'negativo':
                rating_target = 0.25
                sentiment_target = 0.0
            elif sentiment_cat == 'positivo':
                rating_target = 0.875
                sentiment_target = 1.0
            else:  # neutro
                rating_target = 0.6
                sentiment_target = 0.5
            
            training_data.append({
                'user_id': user_idx,
                'item_id': item_idx,
                'city_id': city_idx,
                'contextual_features': contextual_features,
                'temporal_features': temporal_features,
                'weather_sentiment': weather_sentiment_features,
                'rating': rating_target,
                'sentiment': sentiment_target,
                'interaction': 1.0,
                'sample_weight': row['confianza']
            })
        
        return pd.DataFrame(training_data)
    
    def _extract_contextual_features_fast(self, review_row):
        """
        Extracción rápida de features contextuales (solo 10 features)
        """
        return [
            0.5,  # Precio normalizado (placeholder)
            0.7,  # Rating normalizado (placeholder)
            0.0,  # Sentiment promedio (placeholder)
            1.0,  # Review count normalizado (placeholder)
            0.8,  # Confianza promedio (placeholder)
            0.5,  # Factor geográfico (placeholder)
            1.0,  # Factor turístico (placeholder)
            0.6,  # Correlación clima (placeholder)
            0.8,  # Factor fuente (placeholder)
            0.5   # Factor general (placeholder)
        ]
    
    def _extract_temporal_features_fast(self, review_row):
        """
        Features temporales mínimas (solo estación)
        """
        try:
            date = pd.to_datetime(review_row.get('fecha'), errors='coerce')
            if pd.isna(date):
                return [0.25, 0.25, 0.25, 0.25]  # Distribución uniforme
            
            month = date.month
            if month in [12, 1, 2]:
                return [1, 0, 0, 0]  # Invierno
            elif month in [3, 4, 5]:
                return [0, 1, 0, 0]  # Primavera
            elif month in [6, 7, 8]:
                return [0, 0, 1, 0]  # Verano
            else:
                return [0, 0, 0, 1]  # Otoño
        except:
            return [0.25, 0.25, 0.25, 0.25]
    
    def _extract_weather_sentiment_features_fast(self, review_row):
        """
        Features climáticas compactas (15 features)
        """
        sentiment_numeric = {'negativo': -1, 'neutro': 0, 'positivo': 1}.get(
            review_row.get('sentimiento', 'neutro'), 0
        )
        
        weather_desc = str(review_row.get('descripcion_sencilla', '')).lower()
        
        features = [
            sentiment_numeric,  # Sentimiento
            review_row.get('confianza', 0.5),  # Confianza
            1 if 'soleado' in weather_desc else 0,  # Soleado
            1 if 'nublado' in weather_desc else 0,  # Nublado
            1 if 'lluvia' in weather_desc else 0,  # Lluvia
            1 if 'calido' in weather_desc or 'cálido' in weather_desc else 0,  # Cálido
            1 if 'frio' in weather_desc or 'frío' in weather_desc else 0,  # Frío
            1 if 'agradable' in weather_desc else 0,  # Agradable
            0.8 if 'soleado' in weather_desc else 0.5,  # Score climático
            1 if sentiment_numeric > 0 and 'lluvia' in weather_desc else 0,  # Resistencia lluvia
            0.7,  # Placeholder 1
            0.5,  # Placeholder 2
            0.6,  # Placeholder 3
            0.4,  # Placeholder 4
            0.8   # Placeholder 5
        ]
        
        return features
    
    def train_model(self, training_data, validation_split=0.2, epochs=30, batch_size=128):
        """
        Entrenamiento optimizado para RTX 3050 Ti
        """
        print("Iniciando entrenamiento optimizado para RTX 3050 Ti...")
        
        # Preparar datos
        X = {
            'user_id': training_data['user_id'].values.astype('int32'),
            'item_id': training_data['item_id'].values.astype('int32'),
            'city_id': training_data['city_id'].values.astype('int32'),
            'contextual_features': np.array(training_data['contextual_features'].tolist(), dtype='float32'),
            'temporal_features': np.array(training_data['temporal_features'].tolist(), dtype='float32'),
            'weather_sentiment': np.array(training_data['weather_sentiment'].tolist(), dtype='float32')
        }
        
        y = {
            'rating': training_data['rating'].values.astype('float32'),
            'sentiment': training_data['sentiment'].values.astype('float32'),
            'interaction': training_data['interaction'].values.astype('float32')
        }
        
        sample_weights = training_data['sample_weight'].values.astype('float32')
        
        # Callbacks optimizados para velocidad
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=5, restore_best_weights=True, verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss', factor=0.7, patience=3, min_lr=1e-6, verbose=1
            ),
            keras.callbacks.ModelCheckpoint(
                'best_tourism_model_compact.h5', save_best_only=True, monitor='val_loss', verbose=1
            )
        ]
        
        print(f"Configuración de entrenamiento:")
        print(f"  - Batch size: {batch_size} (optimizado para 4GB VRAM)")
        print(f"  - Epochs máximos: {epochs}")
        print(f"  - Samples: {len(training_data):,}")
        print(f"  - Validation split: {validation_split}")
        
        # Entrenar con configuración optimizada
        self.history = self.model.fit(
            X, y,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            sample_weight=sample_weights,
            callbacks=callbacks,
            verbose=1,
            workers=4,  # Usar múltiples cores del i5-12500H
            use_multiprocessing=True
        )
        
        print("Entrenamiento completado!")
        
        # Mostrar mejor performance
        best_epoch = np.argmin(self.history.history['val_loss'])
        best_val_loss = min(self.history.history['val_loss'])
        print(f"Mejor época: {best_epoch + 1}")
        print(f"Mejor val_loss: {best_val_loss:.4f}")
        
        return self.history
        
    def build_hybrid_model(self, num_users, num_items, num_cities, 
                          contextual_features_dim, weather_features_dim=25):
        """
        Construye modelo híbrido con embeddings contextuales y usuario-item
        Actualizado para 25 features de clima específicas
        """
        print("Construyendo modelo híbrido...")
        
        # === ENTRADAS ===
        # Usuario e Item embeddings
        user_input = layers.Input(shape=(), name='user_id')
        item_input = layers.Input(shape=(), name='item_id')
        city_input = layers.Input(shape=(), name='city_id')
        
        # Features contextuales
        contextual_input = layers.Input(shape=(contextual_features_dim,), name='contextual_features')
        
        # Features temporales
        temporal_input = layers.Input(shape=(12,), name='temporal_features')  # 12 meses
        
        # Features de clima y sentimiento (expandido a 25 features)
        weather_sentiment_input = layers.Input(shape=(weather_features_dim,), name='weather_sentiment')
        
        # === EMBEDDINGS ===
        # User embeddings
        user_embedding = layers.Embedding(
            num_users + 1, self.embedding_dim, 
            embeddings_regularizer=keras.regularizers.l2(1e-5)
        )(user_input)
        user_vec = layers.Flatten()(user_embedding)
        
        # Item embeddings
        item_embedding = layers.Embedding(
            num_items + 1, self.embedding_dim,
            embeddings_regularizer=keras.regularizers.l2(1e-5)
        )(item_input)
        item_vec = layers.Flatten()(item_embedding)
        
        # City embeddings
        city_embedding = layers.Embedding(
            num_cities + 1, self.embedding_dim // 2,
            embeddings_regularizer=keras.regularizers.l2(1e-5)
        )(city_input)
        city_vec = layers.Flatten()(city_embedding)
        
        # === PROCESAMIENTO DE FEATURES CONTEXTUALES ===
        # Red para features contextuales (turismo, geo, etc.)
        contextual_dense = layers.Dense(self.dense_units, activation='relu')(contextual_input)
        contextual_dense = layers.Dropout(0.3)(contextual_dense)
        contextual_dense = layers.Dense(self.dense_units // 2, activation='relu')(contextual_dense)
        
        # Red para features temporales
        temporal_dense = layers.Dense(64, activation='relu')(temporal_input)
        temporal_dense = layers.Dropout(0.2)(temporal_dense)
        temporal_dense = layers.Dense(32, activation='relu')(temporal_dense)
        
        # Red especializada para las 25 features de clima-sentimiento
        # Esta es tu ventaja competitiva - red dedicada para climate intelligence
        weather_dense_1 = layers.Dense(128, activation='relu', name='climate_layer_1')(weather_sentiment_input)
        weather_dense_1 = layers.BatchNormalization()(weather_dense_1)
        weather_dense_1 = layers.Dropout(0.3)(weather_dense_1)
        
        weather_dense_2 = layers.Dense(64, activation='relu', name='climate_layer_2')(weather_dense_1)
        weather_dense_2 = layers.Dropout(0.2)(weather_dense_2)
        
        # Capa especializada para patrones climáticos complejos
        weather_patterns = layers.Dense(32, activation='relu', name='climate_patterns')(weather_dense_2)
        
        # === COMBINACIÓN DE FEATURES ===
        # Concatenar todas las representaciones
        combined = layers.Concatenate(name='feature_fusion')([
            user_vec, item_vec, city_vec,
            contextual_dense, temporal_dense, weather_patterns
        ])
        
        # === RED PROFUNDA ESPECIALIZADA ===
        # Capas densas para aprendizaje de patrones complejos
        x = layers.Dense(self.dense_units * 2, activation='relu', name='deep_layer_1')(combined)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.4)(x)
        
        # Capa de atención para features climáticas (tu innovación)
        weather_attention = layers.Dense(weather_patterns.shape[-1], activation='sigmoid', name='weather_attention')(x)
        attended_weather = layers.Multiply(name='climate_attention')([weather_patterns, weather_attention])
        
        # Combinar con el resto de features
        x_with_climate = layers.Concatenate(name='climate_enhanced_features')([x, attended_weather])
        
        x = layers.Dense(self.dense_units, activation='relu', name='deep_layer_2')(x_with_climate)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.3)(x)
        
        x = layers.Dense(self.dense_units // 2, activation='relu', name='deep_layer_3')(x)
        x = layers.Dropout(0.2)(x)
        
        x = layers.Dense(64, activation='relu', name='final_dense')(x)
        x = layers.Dropout(0.1)(x)
        
        # === SALIDAS MÚLTIPLES ===
        # Predicción de rating
        rating_output = layers.Dense(1, activation='sigmoid', name='rating')(x)
        
        # Predicción de sentimiento
        sentiment_output = layers.Dense(1, activation='sigmoid', name='sentiment')(x)  # Cambiado a sigmoid para [0,1]
        
        # Probabilidad de interacción
        interaction_output = layers.Dense(1, activation='sigmoid', name='interaction')(x)
        
        # Predicción especializada de resistencia climática (tu feature única)
        climate_resilience_output = layers.Dense(1, activation='sigmoid', name='climate_resilience')(weather_patterns)
        
        # === MODELO FINAL ===
        self.model = Model(
            inputs=[user_input, item_input, city_input, 
                   contextual_input, temporal_input, weather_sentiment_input],
            outputs=[rating_output, sentiment_output, interaction_output, climate_resilience_output],
            name='TourismRecommenderWithClimateIntelligence'
        )
        
        # Compilar modelo con múltiples losses
        self.model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=0.001),
            loss={
                'rating': 'mse',
                'sentiment': 'mse',
                'interaction': 'binary_crossentropy',
                'climate_resilience': 'binary_crossentropy'
            },
            loss_weights={
                'rating': 1.0,
                'sentiment': 0.6,
                'interaction': 0.3,
                'climate_resilience': 0.4  # Tu feature especializada
            },
            metrics={
                'rating': ['mae'],
                'sentiment': ['mae'],
                'interaction': ['accuracy'],
                'climate_resilience': ['accuracy']
            }
        )
        
        print("Modelo con Climate Intelligence construido exitosamente!")
        print(f"Total parámetros: {self.model.count_params():,}")
        return self.model
    
    def prepare_training_data(self, processed_data):
        """
        Prepara datos para entrenamiento del modelo usando sentimientos categóricos
        """
        print("Preparando datos de entrenamiento...")
        
        # Extraer datos matched
        matched_reviews_df = processed_data.get('matched_reviews', pd.DataFrame())
        activities_df = processed_data['raw_datasets']['activities']
        city_features = processed_data['city_features']
        
        if matched_reviews_df.empty:
            raise ValueError("No hay reviews emparejadas con actividades. Verificar proceso de matching.")
        
        # Convertir sentimiento categórico a numérico
        sentiment_to_numeric = {
            'negativo': -1,
            'neutro': 0,
            'positivo': 1
        }
        
        matched_reviews_df['sentimiento_numerico'] = matched_reviews_df['sentimiento'].map(sentiment_to_numeric)
        
        # Verificar que todas las categorías están mapeadas
        unmapped_sentiments = matched_reviews_df[matched_reviews_df['sentimiento_numerico'].isna()]
        if not unmapped_sentiments.empty:
            print(f"⚠️ Encontrados {len(unmapped_sentiments)} sentimientos no mapeados:")
            print(unmapped_sentiments['sentimiento'].value_counts())
            # Asignar neutro a los no mapeados
            matched_reviews_df['sentimiento_numerico'] = matched_reviews_df['sentimiento_numerico'].fillna(0)
        
        # Mostrar distribución de sentimientos
        print("Distribución de sentimientos en datos de entrenamiento:")
        sentiment_dist = matched_reviews_df['sentimiento'].value_counts()
        for sentiment, count in sentiment_dist.items():
            print(f"   {sentiment}: {count} ({count/len(matched_reviews_df)*100:.1f}%)")
        
        # Crear mapeos de IDs
        user_ids = matched_reviews_df['user_id'].unique()
        item_ids = matched_reviews_df['item_id'].unique()
        cities = list(city_features.keys())
        
        self.user_to_idx = {user: idx for idx, user in enumerate(user_ids)}
        self.item_to_idx = {item: idx for idx, item in enumerate(item_ids)}
        self.city_to_idx = {city: idx for idx, city in enumerate(cities)}
        
        # Crear dataset de entrenamiento
        training_data = []
        
        for idx, row in matched_reviews_df.iterrows():
            if idx % 10000 == 0:
                print(f"Procesando review {idx}/{len(matched_reviews_df)}")
            
            user_idx = self.user_to_idx.get(row['user_id'], 0)
            item_idx = self.item_to_idx.get(row['item_id'], 0)
            city_idx = self.city_to_idx.get(row['ciudad'], 0)
            
            # Features contextuales usando datos reales
            contextual_features = self._extract_contextual_features(
                row, row['ciudad'], city_features, activities_df
            )
            
            # Features temporales
            temporal_features = self._extract_temporal_features(row.get('fecha_comentario'))
            
            # Features de clima y sentimiento categórico
            weather_sentiment_features = self._extract_weather_sentiment_features(row)
            
            # Targets usando categorías de sentimiento
            # Rating: convertir sentimiento categórico a escala 0-1
            sentiment_cat = row['sentimiento']
            if sentiment_cat == 'negativo':
                rating_target = 0.25  # Equivalente a rating 1-2
            elif sentiment_cat == 'positivo':
                rating_target = 0.875  # Equivalente a rating 4-5
            else:  # neutro
                rating_target = 0.6  # Equivalente a rating 3
            
            # Sentimiento numérico normalizado [-1,1] -> [0,1]
            sentiment_numeric = row['sentimiento_numerico']
            sentiment_target = (sentiment_numeric + 1) / 2  # De [-1,1] a [0,1]
            
            # Interacción: siempre 1 porque existe la review
            interaction_target = 1.0
            
            # Climate resilience target (tu feature única)
            # 1 si es experiencia positiva con condiciones adversas, 0 otherwise
            weather_desc = str(row.get('descripcion_corta_clima', '')).lower()
            adverse_conditions = any([
                'lluvia intensa' in weather_desc,
                'lluvia moderada' in weather_desc, 
                ('lloviznas' in weather_desc and sentiment_cat == 'positivo'),
                ('muy caluroso' in weather_desc and sentiment_cat == 'positivo'),
                ('frío' in weather_desc and sentiment_cat == 'positivo')
            ])
            
            climate_resilience_target = 1.0 if (adverse_conditions and sentiment_cat == 'positivo') else 0.0
            
            # Pesar por confianza del modelo de sentimiento
            sample_weight = row['confianza']
            
            training_data.append({
                'user_id': user_idx,
                'item_id': item_idx,
                'city_id': city_idx,
                'contextual_features': contextual_features,
                'temporal_features': temporal_features,
                'weather_sentiment': weather_sentiment_features,
                'rating': rating_target,
                'sentiment': sentiment_target,
                'interaction': interaction_target,
                'climate_resilience': climate_resilience_target,
                'sample_weight': sample_weight,
                'sentiment_category': sentiment_cat  # Mantener categoría para análisis
            })
        
        training_df = pd.DataFrame(training_data)
        
        # Análisis de distribución de targets
        print("\nDistribución de targets de entrenamiento:")
        print(f"   Ratings - min: {training_df['rating'].min():.3f}, max: {training_df['rating'].max():.3f}, mean: {training_df['rating'].mean():.3f}")
        print(f"   Sentiments - min: {training_df['sentiment'].min():.3f}, max: {training_df['sentiment'].max():.3f}, mean: {training_df['sentiment'].mean():.3f}")
        print(f"   Sample weights - min: {training_df['sample_weight'].min():.3f}, max: {training_df['sample_weight'].max():.3f}, mean: {training_df['sample_weight'].mean():.3f}")
        
        return training_df
    
    def _extract_contextual_features(self, review_row, city, city_features, activities_df):
        """
        Extrae features contextuales para una review usando datos reales
        """
        # Obtener actividad
        try:
            activity = activities_df[activities_df['item_id'] == review_row['item_id']].iloc[0]
        except:
            # Actividad no encontrada, usar valores por defecto
            activity = pd.Series({
                'precio': 100,
                'rating': 3.5,
                'avg_sentiment': 0,
                'review_count': 0,
                'avg_confidence': 0.5
            })
        
        # Features básicas de la actividad
        features = [
            activity.get('precio', 100) / 1000,  # Normalizar precio
            activity.get('rating', 3.5) / 5.0,   # Normalizar rating
            activity.get('avg_sentiment', 0),     # Sentiment promedio de la actividad
            activity.get('review_count', 0) / 100,  # Normalizar count
            activity.get('avg_confidence', 0.5),    # Confianza promedio
        ]
        
        # Features geográficas
        city_data = city_features.get(city, {})
        geo_data = city_data.get('geo')
        if geo_data is not None and isinstance(geo_data, pd.Series):
            features.extend([
                geo_data.get('densidad_poblacion', 0),
                geo_data.get('densidad_carreteras', 0),
                geo_data.get('win_population', 0) / 1000000,  # Normalizar población
            ])
        else:
            features.extend([0, 0, 0])
        
        # Features de contexto turístico (datos ONU)
        tourism_context = city_data.get('tourism_context', {})
        combined_features = tourism_context.get('combined_features', {})
        
        features.extend([
            combined_features.get('spending_ratio', 1.0),
            combined_features.get('price_sensitivity_factor', 1.0),
            combined_features.get('tourism_volume_ratio', 1.0),
            combined_features.get('demand_pressure_factor', 1.0),
            combined_features.get('domestic_tourism_strength', 0) / 1000000,  # Normalizar
            combined_features.get('local_preference_factor', 1.0),
        ])
        
        # Features de correlación clima-sentimiento (tu análisis único)
        climate_correlation = city_data.get('climate_sentiment_correlation', {})
        features.extend([
            climate_correlation.get('correlation', 0),
            climate_correlation.get('weather_impact_strength', 0),
        ])
        
        # Features de la fuente del review
        source_encoding = {
            'booking': 0.8, 'airbnb': 0.7, 'tripadvisor': 0.9, 
            'google': 0.6, 'expedia': 0.7, 'otros': 0.5
        }
        source_value = source_encoding.get(review_row.get('fuente', 'otros').lower(), 0.5)
        features.append(source_value)
        
        # Padding hasta completar dimensión fija
        while len(features) < 20:  # contextual_features_dim
            features.append(0)
        
        return features[:20]
    
    def _extract_temporal_features(self, date):
        """
        Extrae features temporales usando fecha real del comentario
        """
        if pd.isna(date):
            date = datetime.now()
        elif isinstance(date, str):
            try:
                date = pd.to_datetime(date)
            except:
                date = datetime.now()
        
        # One-hot encoding del mes
        month_features = [0] * 12
        month_features[date.month - 1] = 1
        
        return month_features
    
    def _extract_weather_sentiment_features(self, review_row):
        """
        Extrae features de clima y sentimiento categórico
        """
        # Convertir sentimiento categórico a numérico
        sentiment_to_numeric = {
            'negativo': -1,
            'neutro': 0,
            'positivo': 1
        }
        
        sentiment_numeric = sentiment_to_numeric.get(review_row.get('sentimiento', 'neutro'), 0)
        
        # Tu feature más valiosa: correlación clima-sentimiento
        features = [
            sentiment_numeric,                              # Sentimiento numérico [-1, 0, 1]
            review_row.get('confianza', 0.5),              # Confianza del modelo de sentimiento
        ]
        
        # Encoding de descripción climática
        weather_desc = str(review_row.get('descripcion_corta_clima', '')).lower()
        
        # Features climáticas categóricas (one-hot simplificado)
        weather_features = {
            'soleado': 1 if any(w in weather_desc for w in ['soleado', 'despejado', 'clear']) else 0,
            'nublado': 1 if any(w in weather_desc for w in ['nublado', 'cloudy']) else 0,
            'lluvioso': 1 if any(w in weather_desc for w in ['lluvia', 'rain', 'lluvioso']) else 0,
            'tormentoso': 1 if any(w in weather_desc for w in ['tormenta', 'storm']) else 0,
        }
        
        features.extend(list(weather_features.values()))
        
        # Features derivadas del análisis clima-sentimiento categórico
        # Sentimiento esperado por categoría climática
        expected_sentiment_map = {
            'soleado': 'positivo',
            'nublado': 'neutro', 
            'lluvioso': 'neutro',  # Puede ser neutro o ligeramente negativo
            'tormentoso': 'negativo'
        }
        
        # Determinar clima dominante
        dominant_weather = 'neutro'  # Por defecto
        for weather_type, is_present in weather_features.items():
            if is_present:
                dominant_weather = weather_type
                break
        
        # Sentimiento esperado vs real
        expected_sentiment = expected_sentiment_map.get(dominant_weather, 'neutro')
        actual_sentiment = review_row.get('sentimiento', 'neutro')
        
        # Feature de coincidencia expectativa-realidad
        expectation_match = 1 if expected_sentiment == actual_sentiment else 0
        features.append(expectation_match)
        
        # Feature de sorpresa positiva (mejor de lo esperado)
        positive_surprise = 0
        if expected_sentiment in ['negativo', 'neutro'] and actual_sentiment == 'positivo':
            positive_surprise = 1
        features.append(positive_surprise)
        
        # Feature de sorpresa negativa (peor de lo esperado)
        negative_surprise = 0
        if expected_sentiment in ['positivo', 'neutro'] and actual_sentiment == 'negativo':
            negative_surprise = 1
        features.append(negative_surprise)
        
        # Feature de resistencia climática (positivo a pesar del mal clima)
        climate_resilience = 0
        if weather_features['lluvioso'] or weather_features['tormentoso']:
            if actual_sentiment == 'positivo':
                climate_resilience = 1
        features.append(climate_resilience)
        
        # Padding hasta 10 features
        while len(features) < 10:
            features.append(0)
        
        return features[:10]
    
    def train_model(self, training_data, validation_split=0.2, epochs=100, batch_size=256):
        """
        Entrena el modelo con sample weights para considerar confianza
        """
        print("Iniciando entrenamiento...")
        
        # Separar features y targets
        X = {
            'user_id': training_data['user_id'].values,
            'item_id': training_data['item_id'].values,
            'city_id': training_data['city_id'].values,
            'contextual_features': np.array(training_data['contextual_features'].tolist()),
            'temporal_features': np.array(training_data['temporal_features'].tolist()),
            'weather_sentiment': np.array(training_data['weather_sentiment'].tolist())
        }
        
        y = {
            'rating': training_data['rating'].values,
            'sentiment': training_data['sentiment'].values,
            'interaction': training_data['interaction'].values,
            'climate_resilience': training_data['climate_resilience'].values
        }
        
        # Sample weights basados en confianza del modelo de sentimiento
        sample_weights = training_data['sample_weight'].values
        
        # Callbacks
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=15, restore_best_weights=True
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss', factor=0.5, patience=8, min_lr=1e-7
            ),
            keras.callbacks.ModelCheckpoint(
                'best_tourism_model.h5', save_best_only=True, monitor='val_loss'
            )
        ]
        
        # Entrenar modelo con sample weights
        self.history = self.model.fit(
            X, y,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            sample_weight=sample_weights,  # Pesar samples por confianza
            callbacks=callbacks,
            verbose=1
        )
        
        print("Entrenamiento completado!")
        print(f"Mejor val_loss: {min(self.history.history['val_loss']):.4f}")
        print(f"Mejor val_rating_mae: {min(self.history.history['val_rating_mae']):.4f}")
        print(f"Mejor val_sentiment_mae: {min(self.history.history['val_sentiment_mae']):.4f}")
        
        return self.history
    
    def generate_recommendations(self, user_id, city, num_recommendations=10, 
                               current_date=None, weather_conditions=None):
        """
        Genera recomendaciones para un usuario usando el modelo entrenado
        """
        if current_date is None:
            current_date = datetime.now()
        
        # Obtener todas las actividades de la ciudad
        city_activities = self._get_city_activities(city)  # Implementar según tu estructura
        
        if not city_activities:
            return []
        
        predictions = []
        
        for item_id in city_activities:
            try:
                # Preparar inputs
                user_idx = self.user_to_idx.get(user_id, 0)
                item_idx = self.item_to_idx.get(item_id, 0)
                city_idx = self.city_to_idx.get(city, 0)
                
                # Crear features para predicción
                contextual_features = self._create_prediction_features(
                    item_id, city, current_date, weather_conditions
                )
                
                # Hacer predicción
                pred_input = {
                    'user_id': np.array([user_idx]),
                    'item_id': np.array([item_idx]),
                    'city_id': np.array([city_idx]),
                    'contextual_features': np.array([contextual_features['contextual']]),
                    'temporal_features': np.array([contextual_features['temporal']]),
                    'weather_sentiment': np.array([contextual_features['weather_sentiment']])
                }
                
                pred = self.model.predict(pred_input, verbose=0)
                
                predictions.append({
                    'item_id': item_id,
                    'predicted_rating': float(pred[0][0][0]),
                    'predicted_sentiment': float(pred[1][0][0]),
                    'interaction_probability': float(pred[2][0][0])
                })
                
            except Exception as e:
                print(f"Error prediciendo para item {item_id}: {e}")
                continue
        
        # Calcular score combinado con énfasis en tus features únicas
        for pred in predictions:
            # Dar más peso al sentiment porque es tu feature más confiable
            pred['combined_score'] = (
                pred['predicted_rating'] * 0.3 +
                pred['predicted_sentiment'] * 0.5 +  # Mayor peso al sentimiento
                pred['interaction_probability'] * 0.2
            )
        
        # Ordenar por score combinado
        recommendations = sorted(predictions, key=lambda x: x['combined_score'], reverse=True)
        
        return recommendations[:num_recommendations]
    
    def _get_city_activities(self, city):
        """
        Obtiene actividades disponibles para una ciudad
        """
        # Esta función debería conectar con tu dataset de actividades
        # Por ahora retorno una lista ejemplo
        return list(self.item_to_idx.keys())[:50]  # Limitar para eficiencia
    
    def _create_prediction_features(self, item_id, city, current_date, weather_conditions):
        """
        Crea features para predicción en tiempo real
        """
        # Features contextuales simplificadas para predicción
        contextual_features = [0.5] * 20  # Valores por defecto
        
        # Features temporales basadas en fecha actual
        temporal_features = [0] * 12
        temporal_features[current_date.month - 1] = 1
        
        # Features de clima actual (si se proporcionan)
        weather_sentiment_features = [0] * 10
        if weather_conditions:
            # Procesar condiciones climáticas actuales
            weather_sentiment_features[0] = 0  # Placeholder para sentimiento
            weather_sentiment_features[1] = 0.8  # Confianza alta para predicción
            # Encoding de condiciones actuales...
        
        return {
            'contextual': contextual_features,
            'temporal': temporal_features,
            'weather_sentiment': weather_sentiment_features
        }
    
    def _extract_contextual_features(self, review_row, city, city_features, activities_df):
        """
        Extrae features contextuales para una review
        """
        # Obtener actividad
        activity = activities_df[activities_df['item_id'] == review_row['item_id']].iloc[0]
        
        # Features básicas de la actividad
        features = [
            activity.get('precio', 0) / 1000,  # Normalizar precio
            activity.get('rating', 0) / 5.0,   # Normalizar rating
            activity.get('avg_sentiment', 0),   # Sentiment promedio
            activity.get('review_count', 0) / 100,  # Normalizar count
        ]
        
        # Features geográficas si están disponibles
        city_data = city_features.get(city, {})
        geo_data = city_data.get('geo')
        if geo_data is not None:
            features.extend([
                geo_data.get('densidad_poblacion', 0),
                geo_data.get('densidad_carreteras', 0),
                geo_data.get('win_population', 0) / 1000000,  # Normalizar
            ])
        else:
            features.extend([0, 0, 0])
        
        # Padding hasta completar dimensión fija
        while len(features) < 20:  # contextual_features_dim
            features.append(0)
        
        return features[:20]
    
    def _extract_temporal_features(self, date):
        """
        Extrae features temporales (estacionalidad)
        """
        if isinstance(date, str):
            date = datetime.strptime(date, '%Y-%m-%d')
        elif pd.isna(date):
            date = datetime.now()
        
        # One-hot encoding del mes
        month_features = [0] * 12
        month_features[date.month - 1] = 1
        
        return month_features
    
    def _extract_weather_sentiment_features(self, review_row):
        """
        Extrae features de clima y sentimiento
        """
        features = [
            review_row.get('sentiment_score', 0),
            review_row.get('temperature', 20) / 40,  # Normalizar temperatura
            review_row.get('humidity', 50) / 100,    # Normalizar humedad
            review_row.get('wind_speed', 5) / 20,    # Normalizar viento
            review_row.get('precipitation', 0) / 10, # Normalizar precipitación
        ]
        
        # Padding hasta 10 features
        while len(features) < 10:
            features.append(0)
        
        return features[:10]
    
    def train_model(self, training_data, validation_split=0.2, epochs=100, batch_size=256):
        """
        Entrena el modelo
        """
        print("Iniciando entrenamiento...")
        
        # Separar features y targets
        X = {
            'user_id': training_data['user_id'].values,
            'item_id': training_data['item_id'].values,
            'city_id': training_data['city_id'].values,
            'contextual_features': np.array(training_data['contextual_features'].tolist()),
            'temporal_features': np.array(training_data['temporal_features'].tolist()),
            'weather_sentiment': np.array(training_data['weather_sentiment'].tolist())
        }
        
        y = {
            'rating': training_data['rating'].values,
            'sentiment': training_data['sentiment'].values,
            'interaction': training_data['interaction'].values
        }
        
        # Callbacks
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=10, restore_best_weights=True
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7
            ),
            keras.callbacks.ModelCheckpoint(
                'best_tourism_model.h5', save_best_only=True, monitor='val_loss'
            )
        ]
        
        # Entrenar modelo
        self.history = self.model.fit(
            X, y,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1
        )
        
        print("Entrenamiento completado!")
        return self.history
    
    def generate_recommendations(self, user_id, city, num_recommendations=10, 
                               current_date=None, weather_conditions=None):
        """
        Genera recomendaciones para un usuario
        """
        if current_date is None:
            current_date = datetime.now()
        
        # Obtener todas las actividades de la ciudad
        city_items = []  # Implementar según tu estructura de datos
        
        # Preparar features para predicción
        predictions = []
        
        for item_id in city_items:
            # Preparar inputs
            user_idx = self.user_to_idx.get(user_id, 0)
            item_idx = self.item_to_idx.get(item_id, 0)
            city_idx = self.city_to_idx.get(city, 0)
            
            # Crear features contextuales, temporales y de clima
            contextual_features = self._create_prediction_features(
                item_id, city, current_date, weather_conditions
            )
            
            # Hacer predicción
            pred_input = {
                'user_id': np.array([user_idx]),
                'item_id': np.array([item_idx]),
                'city_id': np.array([city_idx]),
                'contextual_features': np.array([contextual_features['contextual']]),
                'temporal_features': np.array([contextual_features['temporal']]),
                'weather_sentiment': np.array([contextual_features['weather_sentiment']])
            }
            
            pred = self.model.predict(pred_input, verbose=0)
            
            predictions.append({
                'item_id': item_id,
                'predicted_rating': pred[0][0][0],
                'predicted_sentiment': pred[1][0][0],
                'interaction_probability': pred[2][0][0]
            })
        
        # Ordenar por score combinado
        for pred in predictions:
            pred['combined_score'] = (
                pred['predicted_rating'] * 0.4 +
                pred['predicted_sentiment'] * 0.3 +
                pred['interaction_probability'] * 0.3
            )
        
        recommendations = sorted(predictions, key=lambda x: x['combined_score'], reverse=True)
        
        return recommendations[:num_recommendations]
    
    def save_model(self, model_path):
        """
        Guarda el modelo entrenado
        """
        self.model.save(model_path)
        
        # Guardar mapeos
        mappings = {
            'user_to_idx': self.user_to_idx,
            'item_to_idx': self.item_to_idx,
            'city_to_idx': self.city_to_idx
        }
        
        with open(model_path.replace('.h5', '_mappings.pkl'), 'wb') as f:
            pickle.dump(mappings, f)
        
        print(f"Modelo guardado en {model_path}")

In [24]:
class ExplainabilityEngine:
    def __init__(self, reranking_system):
        self.reranking_system = reranking_system
    
    def generate_explanation_fast(self, recommendation, user_context=None):
        """
        Explicación rápida y simple
        """
        explanations = []
        base_score = recommendation.get('original_score', 0)
        
        explanations.append(f"Puntuación base: {base_score:.2f}")
        
        # Explicaciones simplificadas
        if recommendation.get('weather_boost', False):
            explanations.append("Recomendado por buen clima actual")
        
        if recommendation.get('temporal_boost', False):
            explanations.append("Horario ideal para esta actividad")
        
        if recommendation.get('trending_boost', False):
            explanations.append("Actividad popular últimamente")
        
        if recommendation.get('seasonal_boost', False):
            explanations.append("Temporada alta - experiencia óptima")
        
        return {
            'final_score': recommendation.get('final_score', base_score),
            'explanations': explanations,
            'confidence': min(1.0, recommendation.get('final_score', base_score)),
            'boost_factors': {
                'weather': recommendation.get('weather_boost', False),
                'temporal': recommendation.get('temporal_boost', False),
                'trending': recommendation.get('trending_boost', False),
                'seasonal': recommendation.get('seasonal_boost', False)
            }
        }
