In [None]:
import os
import json
import zipfile
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import kagglehub
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import mean_squared_error, mean_absolute_error
from prophet import Prophet

In [None]:

#ES NECESARIO SUBIR LOS CSV AL COLAB
# Download latest version
path = kagglehub.dataset_download("amanmehra23/travel-recommendation-dataset")

print("Path to dataset files:", path)

destinations_df = pd.read_csv("/kaggle/input/travel-recommendation-dataset/Expanded_Destinations.csv")
reviews_df = pd.read_csv("/kaggle/input/travel-recommendation-dataset/Final_Updated_Expanded_Reviews.csv")
userhistory_df = pd.read_csv("/kaggle/input/travel-recommendation-dataset/Final_Updated_Expanded_UserHistory.csv")
users_df = pd.read_csv("/kaggle/input/travel-recommendation-dataset/Final_Updated_Expanded_Users.csv")

Path to dataset files: /kaggle/input/travel-recommendation-dataset


In [None]:
display(userhistory_df.head())
userhistory_df.info()

In [None]:
# Tipado uniforme
for df in [users_df, userhistory_df, reviews_df]:
    df['UserID'] = df['UserID'].astype(str)
for df in [destinations_df, userhistory_df, reviews_df]:
    df['DestinationID'] = df['DestinationID'].astype(str)


# Verificar valores únicos por columna
print("\nValores únicos por columna:")
print(userhistory_df.nunique())

# Verificar valores nulos
print("\nValores nulos por columna:")
print(userhistory_df.isnull().sum())


Valores únicos por columna:
HistoryID           999
UserID              642
DestinationID       638
VisitDate             3
ExperienceRating      5
dtype: int64

Valores nulos por columna:
HistoryID           0
UserID              0
DestinationID       0
VisitDate           0
ExperienceRating    0
dtype: int64


In [None]:
# Combinar datasets

merged_df = pd.merge(userhistory_df, users_df, on='UserID', how='left')

merged_df = pd.merge(merged_df, destinations_df, on='DestinationID', how='left')

#df = pd.merge(merged_df, reviews_df, on='UserID', how='left')
df = merged_df
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   HistoryID         999 non-null    int64  
 1   UserID            999 non-null    object 
 2   DestinationID     999 non-null    object 
 3   VisitDate         999 non-null    object 
 4   ExperienceRating  999 non-null    int64  
 5   Name_x            999 non-null    object 
 6   Email             999 non-null    object 
 7   Preferences       999 non-null    object 
 8   Gender            999 non-null    object 
 9   NumberOfAdults    999 non-null    int64  
 10  NumberOfChildren  999 non-null    int64  
 11  Name_y            999 non-null    object 
 12  State             999 non-null    object 
 13  Type              999 non-null    object 
 14  Popularity        999 non-null    float64
 15  BestTimeToVisit   999 non-null    object 
dtypes: float64(1), int64(4), object(11)
memory u

In [None]:
merged_df = pd.merge(userhistory_df, users_df, on='UserID', how='left')
merged_df = pd.merge(merged_df,destinations_df, on='DestinationID', how='left')
merged_df.to_csv("Merged_Travel_Data.csv", index=False)
m = pd.read_csv("Merged_Travel_Data.csv")
m.info()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   HistoryID         999 non-null    int64  
 1   UserID            999 non-null    int64  
 2   DestinationID     999 non-null    int64  
 3   VisitDate         999 non-null    object 
 4   ExperienceRating  999 non-null    int64  
 5   Name_x            999 non-null    object 
 6   Email             999 non-null    object 
 7   Preferences       999 non-null    object 
 8   Gender            999 non-null    object 
 9   NumberOfAdults    999 non-null    int64  
 10  NumberOfChildren  999 non-null    int64  
 11  Name_y            999 non-null    object 
 12  State             999 non-null    object 
 13  Type              999 non-null    object 
 14  Popularity        999 non-null    float64
 15  BestTimeToVisit   999 non-null    object 
dtypes: float64(1), int64(6), object(9)
memory us

In [None]:
#Revisión de datos nulos o duplicados
df.shape
df.duplicated().sum()
df.isnull().sum()
#Eliminar columnas irrelevantes
df_clean = df.drop(columns=['HistoryID','Name_x','Email'])
df_clean['VisitDate'] = pd.to_datetime(df_clean['VisitDate'])

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(y='Name', x='Popularity', data=destinations_df.sort_values(by='Popularity', ascending=True), palette='coolwarm', hue='Name')
plt.title('Most Popular Destinations')
plt.xlabel('Popularity Score')
plt.ylabel('Destination')
plt.show()

In [None]:
#Ver cuantoas datos hay por cada usuario
userhistory_df.groupby('UserID').size().sort_values(ascending=False)

In [None]:
#Convertir la fecha a datetime
df_copia = df.copy()
df_copia['VisitDate'] = pd.to_datetime(df_copia['VisitDate'], errors='coerce')
df_copia.info()

In [None]:
df_copia.head()

In [None]:
#Crear la serie de tiempo agregada: Agrupar por día y destino para contar número de viajes
df_demand = df_copia.groupby(['VisitDate', 'Name_x']).size().reset_index(name='num_viajes')

# Ordenar por fecha
df_demand = df_demand.sort_values('VisitDate')

# Ver los destinos más populares
top_destinos = df_demand.groupby('Name_x')['num_viajes'].sum().sort_values(ascending=False)
print("Destinos más visitados:")
print(top_destinos.head(10))


In [None]:
#Filtar el destino con más viajes para modelar
destino_top = top_destinos.index[0]

# Filtrar los datos para ese destino
df_top = df_demand[df_demand['Name_x'] == destino_top].copy()

# Rellenar fechas faltantes con 0 viajes
# Crear un rango continuo de fechas
rango_fechas = pd.date_range(start=df_top['VisitDate'].min(), end=df_top['VisitDate'].max())

# Reindexar para asegurar continuidad temporal
df_top = df_top.set_index('VisitDate').reindex(rango_fechas, fill_value=0)
df_top = df_top.rename_axis('VisitDate').reset_index()

#Quitar valores nulos o vácios
df_top=df_top[df_top['num_viajes']!=0]
df_top.dropna(inplace=True)

# Confirmar
print(f"\nSerie de tiempo para destino {destino_top}")
display(df_top.groupby(['Name_x','VisitDate']).sum())

In [None]:
# Seleccionar la ciudad más visitada (por ejemplo)
ciudad_top = df_demand.groupby('Name_x')['num_viajes'].sum().sort_values(ascending=False).index[0]

# Filtrar solo esa ciudad
df_ciudad = df_demand[df_demand['Name_x'] == ciudad_top].copy()

# Renombrar columnas como lo requiere Prophet
df_ciudad = df_ciudad.rename(columns={'VisitDate': 'ds', 'num_viajes': 'y'})

# Asegurarnos de que estén ordenadas por fecha
df_ciudad = df_ciudad.sort_values('ds')

# Mostrar datos
print(f"Serie de tiempo para la ciudad: {ciudad_top}")
display(df_ciudad)

In [None]:
# Crear el modelo
modelo = Prophet(daily_seasonality=True)

# Entrenar el modelo
modelo.fit(df_ciudad)

# Crear un dataframe para predicción de los próximos 30 días
future = modelo.make_future_dataframe(periods=30)

# Predecir
forecast = modelo.predict(future)

# Graficar los resultados
modelo.plot(forecast)
plt.title(f"Predicción de demanda para {ciudad_top}")
plt.xlabel("Fecha")
plt.ylabel("Número de viajes")
plt.show()

In [None]:
# Lista de los 5 destinos más visitados
top_5_ciudades = df_demand.groupby('Name_x')['num_viajes'].sum().sort_values(ascending=False).head(5).index

# Crear diccionario para guardar predicciones
predicciones_por_ciudad = {}

# Generar predicciones para cada ciudad
for ciudad in top_5_ciudades:
    df_ciudad = df_demand[df_demand['Name_x'] == ciudad][['VisitDate', 'num_viajes']].copy()
    df_ciudad = df_ciudad.rename(columns={'VisitDate': 'ds', 'num_viajes': 'y'})
    df_ciudad = df_ciudad.sort_values('ds')

    modelo = Prophet(daily_seasonality=True)
    modelo.fit(df_ciudad)

    future = modelo.make_future_dataframe(periods=30)
    forecast = modelo.predict(future)

    predicciones_por_ciudad[ciudad] = forecast[['ds', 'yhat']].copy()
    predicciones_por_ciudad[ciudad]['ciudad'] = ciudad

In [None]:
# Unir todos los forecast en un solo DataFrame
df_predicciones_total = pd.concat(predicciones_por_ciudad.values())

# Filtrar solo las fechas futuras (los 30 días siguientes)
ultima_fecha_real = df_demand['VisitDate'].max()
df_futuro = df_predicciones_total[df_predicciones_total['ds'] > ultima_fecha_real]

# Gráfico combinado
plt.figure(figsize=(12, 6))
for ciudad in top_5_ciudades:
    datos = df_futuro[df_futuro['ciudad'] == ciudad]
    plt.plot(datos['ds'], datos['yhat'], label=ciudad)

plt.title("Proyección de demanda para los próximos 30 días")
plt.xlabel("Fecha")
plt.ylabel("Número de viajes")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#Métricas de evaluación (RMSE, MAE)
print("Métricas de evaluación:\n")

for ciudad in top_5_ciudades:
    df_real = df_demand[df_demand['Name_x'] == ciudad][['VisitDate', 'num_viajes']].copy()
    df_real = df_real.rename(columns={'VisitDate': 'ds', 'num_viajes': 'y'}).sort_values('ds')

    forecast = predicciones_por_ciudad[ciudad]
    forecast_real = forecast.merge(df_real, on='ds', how='inner')  # Solo fechas reales

    rmse = np.sqrt(mean_squared_error(forecast_real['y'], forecast_real['yhat']))
    mae = mean_absolute_error(forecast_real['y'], forecast_real['yhat'])

    print(f"{ciudad} - RMSE: {rmse:.2f} | MAE: {mae:.2f}")

In [None]:
#Gráficas de predicción vs demanda real
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 12))
axes = axes.flatten()

for i, ciudad in enumerate(top_5_ciudades):
    forecast = predicciones_por_ciudad[ciudad]
    df_real = df_demand[df_demand['Name_x'] == ciudad][['VisitDate', 'num_viajes']]

    ax = axes[i]
    ax.plot(df_real['VisitDate'], df_real['num_viajes'], 'o', label='Real')
    ax.plot(forecast['ds'], forecast['yhat'], '-', label='Predicción')
    ax.set_title(f"Predicción vs Real - {ciudad}")
    ax.set_xlabel("Fecha")
    ax.set_ylabel("Número de viajes")
    ax.legend()

plt.tight_layout()
plt.show()

In [None]:
# Almacenar modelos y predicciones
predicciones_por_ciudad = {}
modelos_por_ciudad = {}

for ciudad in top_5_ciudades:
    df_ciudad = df_demand[df_demand['Name_x'] == ciudad][['VisitDate', 'num_viajes']].copy()
    df_ciudad = df_ciudad.rename(columns={'VisitDate': 'ds', 'num_viajes': 'y'}).sort_values('ds')

    modelo = Prophet(daily_seasonality=True)
    modelo.fit(df_ciudad)

    future = modelo.make_future_dataframe(periods=30)
    forecast = modelo.predict(future)

    predicciones_por_ciudad[ciudad] = forecast
    modelos_por_ciudad[ciudad] = modelo

# Mostrar componentes para cada ciudad
for ciudad in top_5_ciudades:
    print(f"\nComponentes del modelo para {ciudad}:")
    modelos_por_ciudad[ciudad].plot_components(predicciones_por_ciudad[ciudad])
    plt.show()

Aunque el dataset tiene un número limitado de fechas, se observan algunos indicios de variaciones diarias que podrían interpretarse como patrones estacionales si tuviéramos más datos. Prophet permite visualizar las componentes de tendencia, estacionalidad y efectos semanales o anuales. En este caso, no se observan componentes significativos por la escasez de datos, pero el modelo está correctamente estructurado para captarlas si se amplía la serie temporal.

In [None]:
######################PUNTO 3################################
#df_copia.head()

# Crear la columna de características combinadas
dt_df = df.copy()
dt_df['features'] = dt_df['Type'] + ' ' + dt_df['State'] + ' ' + dt_df['BestTimeToVisit'] + " " + dt_df['Preferences']
#dt_df['features'] = destinations_df[['Type', 'State', 'BestTimeToVisit', 'Preferences']].fillna('').agg(' '.join, axis=1)
#Vectorizar
vectorizer = CountVectorizer(stop_words='english')
#Primero creamos la columna 'features' para guardar la inforamación que nos permitirá recomendar el destino al usuario
#df_copia['features'] = df_copia['Type'] + ' ' + df_copia['State'] + ' ' + df_copia['BestTimeToVisit'] + " " + df_copia['Preferences']

dt_df.rename(columns={'DestinationID_x': 'DestinationID','Name_x':'Name'}, inplace=True)
feature_matrix = vectorizer.fit_transform(dt_df['features'])
destination_features = dt_df

cosine_sim = cosine_similarity(feature_matrix, feature_matrix)
user_item_matrix = userhistory_df.pivot_table(index='UserID', columns='DestinationID', values='ExperienceRating',aggfunc='mean')

user_item_matrix = user_item_matrix.fillna(0)
user_similarity = cosine_similarity(user_item_matrix)
#print(user_item_matrix,user_similarity)

In [None]:
def hybrid_recommend(user_id, user_item_matrix, user_similarity, destinations_df,feature_matrix, vectorizer,
                     user_preferences=None, top_n=5, k_neighbors=5):
    """
    Recomienda destinos combinando filtrado colaborativo y contenido.

    Si el usuario no tiene historial (usuario nuevo), usa sus preferencias.

    Args:
        user_id (int): ID del usuario.
        user_item_matrix (pd.DataFrame): Matriz usuario-destino.
        user_similarity (ndarray): Matriz de similitud entre usuarios.
        destinations_df (pd.DataFrame): Información de los destinos.
        feature_matrix: Matriz de características vectorizadas.
        vectorizer: Vectorizador entrenado (CountVectorizer o similar).
        user_preferences (dict): Diccionario de preferencias del usuario.
        top_n (int): Número de recomendaciones.
        k_neighbors (int): Número de vecinos para el filtrado colaborativo.

    Returns:
        pd.DataFrame con recomendaciones.
    """

    destination_ids = destinations_df['DestinationID'].values

    # Caso 1: Usuario conocido (está en la matriz)
    if user_id in user_item_matrix.index:
        user_idx = user_item_matrix.index.get_loc(user_id)

        similarities = user_similarity[user_idx]
        # Vecinos más similares (excepto él mismo)
        similar_users_idx = np.argsort(similarities)[::-1][1:k_neighbors+1]
        similar_users = user_item_matrix.index[similar_users_idx]
        # Ratings promedio ponderados por similitud
        weighted_ratings = np.zeros(user_item_matrix.shape[1])
        similarity_sum = np.zeros(user_item_matrix.shape[1])

        for i, neighbor_id in enumerate(similar_users):
            sim = similarities[similar_users_idx[i]]
            neighbor_ratings = user_item_matrix.loc[neighbor_id].values
            weighted_ratings += sim * neighbor_ratings
            similarity_sum += (neighbor_ratings > 0) * sim
        with np.errstate(divide='ignore', invalid='ignore'):
            predicted_ratings = np.true_divide(weighted_ratings, similarity_sum)
            predicted_ratings[np.isnan(predicted_ratings)] = 0

        user_rated = user_item_matrix.loc[user_id]
        unrated_mask = user_rated == 0

        unrated_ratings = predicted_ratings[unrated_mask.values]
        unrated_destinations = user_item_matrix.columns[unrated_mask]


        top_indices = np.argsort(unrated_ratings)[::-1][:top_n]
        top_dest_ids = unrated_destinations[top_indices]
        top_scores = unrated_ratings[top_indices]


        recommendations = destinations_df[destinations_df['DestinationID'].isin(top_dest_ids)].copy()
        recommendations['PredictedRating'] = recommendations['DestinationID'].map(dict(zip(top_dest_ids, top_scores)))
        recommendations['Reason'] = "Similar users rated it highly"

    # Caso 2: Usuario nuevo (preferencias como texto)
    else:
        if user_preferences is None:
            raise ValueError("User preferences must be provided for new users.")

        # dt_df = df.copy()
        # dt_df['features'] = dt_df['Type'] + ' ' + dt_df['State'] + ' ' + dt_df['BestTimeToVisit'] + " " + dt_df['Preferences']

        pref_str = (
            user_preferences.get('Type', '') + ' ' +
            user_preferences.get('State', '') + ' ' +
            user_preferences.get('BestTimeToVisit', '') + ' ' +
            user_preferences.get('Preferences', '')
        ).lower()
        pref_vector = vectorizer.transform([pref_str])
        #print("pref_str: ",pref_str,"\npref_vector:", pref_vector)
        similarities = cosine_similarity(pref_vector, feature_matrix).flatten()
        top_indices = np.argsort(similarities)[::-1][:top_n]

        recommendations = destinations_df.iloc[top_indices].copy()
        recommendations['PredictedRating'] = similarities[top_indices]
        recommendations['Reason'] = "Matches your preferences"

    # Ordenar y retornar columnas relevantes
    recommendations = recommendations.sort_values(by='PredictedRating', ascending=False)
    return recommendations[['DestinationID', 'Name', 'Type', 'State', 'BestTimeToVisit', 'Popularity', 'PredictedRating', 'Reason']]
#Example: Collaborative recommendations for user 1
collaborative_recommendations = hybrid_recommend(
    user_id=234,
    user_item_matrix=user_item_matrix,
    user_similarity=user_similarity,
    destinations_df=df_copia,
    feature_matrix=feature_matrix,
    vectorizer=vectorizer,
    top_n=5,
    k_neighbors=10
)
#Display recommendations
print(collaborative_recommendations)
#New user
user_prefs = {'Type': 'Mountain', 'State': 'Antioquia', 'BestTimeToVisit': 'Dry Season', 'Preferences': 'City,Historical'}

hybrid_recommend(
    user_id=10,  # un ID que no esté en el user_item_matrix
    user_item_matrix=user_item_matrix,
    user_similarity=user_similarity,
    destinations_df=dt_df,
    feature_matrix=feature_matrix,
    vectorizer=vectorizer,
    user_preferences=user_prefs,
    top_n=5,
    k_neighbors=10
)

In [None]:
def recommend_destinations(user_id, userhistory_df, destinations_df, cosine_sim):
    """
    Recommends top 5 destinations for a given user based on similarity scores.

    Args:
    - user_id: ID of the user.
    - userhistory_df: User history DataFrame containing 'UserID' and 'DestinationID'.
    - destinations_df: Destinations DataFrame containing destination details.
    - cosine_sim: Cosine similarity matrix for destinations.

    Returns:
    - DataFrame with recommended destinations and their details.
    """
    # Get the destinations the user has visited
    visited_destinations = userhistory_df[userhistory_df['UserID'] == user_id]['DestinationID'].values

    # Calculate similarity scores for visited destinations
    similar_scores = np.sum(cosine_sim[visited_destinations - 1], axis=0)

    # Recommend the top 5 destinations the user hasn't visited yet
    recommended_destinations_idx = np.argsort(similar_scores)[::-1]

    recommendations = []
    for idx in recommended_destinations_idx:
        if destinations_df.iloc[idx]['DestinationID'] not in visited_destinations:
            # Append detailed information for each recommendation
            recommendations.append(destinations_df.iloc[idx][[
                'DestinationID', 'Name', 'State', 'Type', 'Popularity', 'BestTimeToVisit','ExperienceRating'
            ]].to_dict())
        if len(recommendations) >= 5:
            break

    # Convert recommendations to a DataFrame
    return pd.DataFrame(recommendations)

# Example: Recommend destinations for user with ID 1
recommended_destinations = recommend_destinations(100, userhistory_df, df_copia, cosine_sim)

# Display recommendations
recommended_destinations

In [None]:
def collaborative_recommend(user_id, user_similarity, user_item_matrix, destinations_df, top_n=5,k_neighbors=5):
    """
    Recommends destinations based on collaborative filtering.

    Args:
    - user_id: ID of the user for whom recommendations are to be made.
    - user_similarity: Cosine similarity matrix for users.
    - user_item_matrix: User-item interaction matrix (e.g., ratings or preferences).
    - destinations_df: DataFrame containing destination details.

    Returns:
    - DataFrame with recommended destinations and their details.
    """
    #Filter destinations already visited by the user
    visited = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index

    # Find similar users
    similar_users = user_similarity[user_id - 1]

    # Get the top 10 most similar users
    similar_users_idx = np.argsort(similar_users)[::-1][1:k_neighbors+1]

    # Weights by similitude
    similar_weights = similar_users[similar_users_idx]
    weighted_ratings = user_item_matrix.iloc[similar_users_idx].T.dot(similar_weights) / similar_weights.sum()

    # Get the destinations liked by similar users
    weighted_ratings = weighted_ratings.drop(labels=visited, errors='ignore')
    # Recommend the top 5 destinations
    recommended_destinations_ids = weighted_ratings.sort_values(ascending=False).head(top_n).index

    # Filter the destinations DataFrame to include detailed information
    recommendations = destinations_df[destinations_df['DestinationID'].isin(recommended_destinations_ids)][[
        'DestinationID', 'Name', 'State', 'Type', 'Popularity', 'BestTimeToVisit','ExperienceRating'
    ]]

    return recommendations

# Example: Collaborative recommendations for user 1
collaborative_recommendations = collaborative_recommend(100, user_similarity, user_item_matrix, destinations_df)

# Display recommendations
collaborative_recommendations

In [None]:
def hybrid_recommend(
    user_id,
    user_item_matrix,
    user_similarity,
    destinations_df,
    feature_matrix,
    vectorizer,
    user_preferences=None,
    top_n=5,
    k_neighbors=5
):
    """
    Recomienda destinos combinando filtrado colaborativo y contenido.

    Si el usuario no tiene historial (usuario nuevo), usa sus preferencias.

    Args:
        user_id (int): ID del usuario.
        user_item_matrix (pd.DataFrame): Matriz usuario-destino.
        user_similarity (ndarray): Matriz de similitud entre usuarios.
        destinations_df (pd.DataFrame): Información de los destinos.
        feature_matrix: Matriz de características vectorizadas.
        vectorizer: Vectorizador entrenado (CountVectorizer o similar).
        user_preferences (dict): Diccionario de preferencias del usuario.
        top_n (int): Número de recomendaciones.
        k_neighbors (int): Número de vecinos para el filtrado colaborativo.

    Returns:
        pd.DataFrame con recomendaciones.
    """
    try:
        # Intentar acceder al usuario en la matriz
        if user_id in user_item_matrix.index:
            # --- Filtrado colaborativo ---
            #Filter destinations already visited by the user
          visited = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index

          # Find similar users
          similar_users = user_similarity[user_id - 1]

          # Get the top 10 most similar users
          similar_users_idx = np.argsort(similar_users)[::-1][1:k_neighbors+1]

          # Weights by similitude
          similar_weights = similar_users[similar_users_idx]
          weighted_ratings = user_item_matrix.iloc[similar_users_idx].T.dot(similar_weights) / similar_weights.sum()

          # Get the destinations liked by similar users
          weighted_ratings = weighted_ratings.drop(labels=visited, errors='ignore')
          # Recommend the top 5 destinations
          recommended_destinations_ids = weighted_ratings.sort_values(ascending=False).head(top_n).index

          # Filter the destinations DataFrame to include detailed information
          recommendations = destinations_df[destinations_df['DestinationID'].isin(recommended_destinations_ids)][[
              'DestinationID', 'Name', 'State', 'Type', 'Popularity', 'BestTimeToVisit', 'ExperienceRating'
          ]]

          return recommendations

        else:
            raise KeyError  # Forzar salto al except si no está en el índice

    except KeyError:
        # --- Usuario nuevo: recomendación basada en contenido ---
        if user_preferences is None:
            raise ValueError("Usuario nuevo: se requieren preferencias para recomendación basada en contenido.")

        preference_str = (
            user_preferences.get('Type', '') + ' ' +
            user_preferences.get('State', '') + ' ' +
            user_preferences.get('BestTimeToVisit', '')
        )
        preference_vector = vectorizer.transform([preference_str])
        similarity_scores = cosine_similarity(preference_vector, destination_features).flatten()
        top_indices = similarity_scores.argsort()[::-1][:top_n]

        return destinations_df.iloc[top_indices][[
            'DestinationID', 'Name', 'Type', 'State', 'BestTimeToVisit', 'Popularity','ExperienceRating'
        ]]
# Example: Collaborative recommendations for user 1
collaborative_recommendations = hybrid_recommend(
    user_id=5,
    user_item_matrix=user_item_matrix,
    user_similarity=user_similarity,
    destinations_df= df_copia, #destinations_df,
    feature_matrix=feature_matrix,
    vectorizer=vectorizer,
    top_n=5,
    k_neighbors=10
)
# Display recommendations
print(collaborative_recommendations)
#New user
user_prefs = {'Type': 'Mountain', 'State': 'Antioquia', 'BestTimeToVisit': 'Dry Season'}

hybrid_recommend(
    user_id=10,  # un ID que no esté en el user_item_matrix
    user_item_matrix=user_item_matrix,
    user_similarity=user_similarity,
    destinations_df=df_copia,
    feature_matrix=feature_matrix,
    vectorizer=vectorizer,
    user_preferences=user_prefs,
    top_n=5,
    k_neighbors=10
)