In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re

In [None]:
# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')

In [15]:
class TravelRecommender:
    def __init__(self, data_path):
        """
        Initialize the recommendation system with the dataset
        """
        self.df = pd.read_csv(data_path)
        self.preprocess_data()

    def preprocess_data(self):
        """
        Preprocess the dataset for both content-based and collaborative filtering
        """
        # Clean text data
        self.df['attractions'] = self.df['attractions'].fillna('')
        self.df['user_preferences'] = self.df['user_preferences'].fillna('')

        # Normalize budget
        scaler = MinMaxScaler()
        self.df['normalized_budget'] = scaler.fit_transform(self.df[['budget']])

        # Create TF-IDF vectors for attractions and preferences
        self.tfidf = TfidfVectorizer(stop_words='english')
        self.attractions_matrix = self.tfidf.fit_transform(self.df['attractions'])
        self.preferences_matrix = self.tfidf.transform(self.df['user_preferences'])

    def content_based_search(self, user_preferences, budget_range, top_n=5):
        """
        Content-based filtering using TF-IDF and cosine similarity
        """
        # Transform user preferences
        user_vec = self.tfidf.transform([user_preferences])

        # Calculate similarity scores
        content_similarity = cosine_similarity(user_vec, self.attractions_matrix)

        # Filter by budget
        budget_mask = (self.df['budget'] >= budget_range[0]) & (self.df['budget'] <= budget_range[1])

        # Combine scores and budget filter
        recommendations = []
        for idx, score in enumerate(content_similarity[0]):
            if budget_mask[idx]:
                recommendations.append((self.df.iloc[idx], score))

        # Sort and get top recommendations
        recommendations.sort(key=lambda x: x[1], reverse=True)
        return recommendations[:top_n]

    def collaborative_filtering(self, user_preferences, age, budget, top_n=5):
        """
        Collaborative filtering using K-Nearest Neighbors
        """
        # Create user feature vector
        user_features = np.concatenate([
            self.tfidf.transform([user_preferences]).toarray(),
            [[age, budget]]
        ], axis=1)

        # Create feature matrix for all users
        feature_matrix = np.concatenate([
            self.preferences_matrix.toarray(),
            self.df[['age', 'budget']].values
        ], axis=1)

        # Find nearest neighbors
        knn = NearestNeighbors(n_neighbors=top_n+1, metric='cosine')
        knn.fit(feature_matrix)

        distances, indices = knn.kneighbors(user_features)

        return [(self.df.iloc[idx], 1 - dist) for dist, idx in zip(distances[0][1:], indices[0][1:])]

    def hybrid_recommendations(self, user_preferences, age, budget_range, top_n=5):
        """
        Combine both recommendation methods for better results
        """
        # Get recommendations from both methods
        content_recs = self.content_based_search(user_preferences, budget_range, top_n)
        collab_recs = self.collaborative_filtering(user_preferences, age, (budget_range[0] + budget_range[1])/2, top_n)

        # Combine and weight the recommendations
        hybrid_scores = {}

        # Weight for combining (can be adjusted)
        content_weight = 0.6
        collab_weight = 0.4

        # Process content-based recommendations
        for dest, score in content_recs:
            hybrid_scores[dest['destination_id']] = score * content_weight

        # Process collaborative filtering recommendations
        for dest, score in collab_recs:
            dest_id = dest['destination_id']
            if dest_id in hybrid_scores:
                hybrid_scores[dest_id] += score * collab_weight
            else:
                hybrid_scores[dest_id] = score * collab_weight

        # Get final recommendations
        final_recommendations = []
        for dest_id, score in sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)[:top_n]:
            destination = self.df[self.df['destination_id'] == dest_id].iloc[0]
            final_recommendations.append((destination, score))

        return final_recommendations

    def evaluate_recommendations(self, test_users):
        """
        Evaluate the recommendation system using precision and user satisfaction metrics
        """
        precision_scores = []
        satisfaction_scores = []

        for user in test_users:
            # Get recommendations
            recs = self.hybrid_recommendations(
                user['preferences'],
                user['age'],
                (user['budget'] * 0.8, user['budget'] * 1.2)
            )

            # Calculate precision (assuming we have ground truth data)
            relevant_count = sum(1 for rec, _ in recs if rec['popularity'] > 4.0)
            precision = relevant_count / len(recs)
            precision_scores.append(precision)

            # Calculate estimated user satisfaction based on preference matching
            satisfaction = np.mean([score for _, score in recs])
            satisfaction_scores.append(satisfaction)

        return {
            'average_precision': np.mean(precision_scores),
            'average_satisfaction': np.mean(satisfaction_scores)
            }

In [None]:
# Mount Google Drive (if dataset is stored there)
from google.colab import drive
drive.mount('/content/drive')

# Initialize the recommender system
recommender = TravelRecommender('data/Travel.csv')

# Example user
user_preferences = "mountain hiking nature photography cultural experiences"
age = 28
budget_range = (2000,10000)

# Get recommendations
recommendations = recommender.hybrid_recommendations(user_preferences, age, budget_range)

# Print recommendations
for destination, score in recommendations:
    print(f"Destination: {destination['name']}")
    print(f"Region: {destination['region']}")
    print(f"Budget: {destination['budget']}")
    print(f"Best Season: {destination['best_season']}")
    print(f"Match Score: {score:.2f}")
    print("---")
