In [2]:
import pandas as pd

In [4]:
df = pd.read_csv(r"model_final_data.csv")


In [5]:
values_to_drop = [
    'New Brunswick', 'Hama', 'Dubai', 'South Island', 'Falls Church', 'New South Wales', 'Singapore', 'Fort Worth', 'Ontario',
    'Hail', 'Doha', 'Queens', 'Burnsville', 'Arlington', 'Istanbul', 'Allentown', 'Beirut', 'Ho Chi Minh City', 'Victoria',
    'Puerto Rico', 'Anaheim', 'Zurich', 'Emirate of Abu Dhabi', 'Alentejo', 'Marion', 'Vienna', 'Nairobi', 'Custer',
    'Queensland', 'Makkah', 'Mount Prospect', 'Broward County', 'Addis Ababa', 'New York City', 'San Francisco', 'Boise',
    'Central', 'Maharashtra', 'Western', 'Piedmont', 'Washington DC', 'Appingedam', 'Northern Portugal', 'Bohemia', 'Eastern',
    'Seoul', 'Transylvania', 'Lazio', 'El Cajon', 'Cream Ridge', 'Zhejiang', 'Clarksville', 'Vientiane Prefecture',
    'Southern District', 'Berlin', 'Seattle', 'Chicago', 'Quebec', 'Jerusalem District', 'San Jose', 'Eastern Norway',
    'Salt Lake City', 'Boston', 'Philadelphia', 'Central Portugal', 'Raleigh', 'Izmir', 'Tel Aviv District', 'Amman',
    'Sofia Region', 'Central Hungary', 'Atlanta', 'Wichita', 'Island of Gozo', 'Nova Scotia', 'Las Vegas', 'Escalante',
    'Bavaria', 'Khartoum State', 'Stockholm', 'Northern District', 'Lima Region', 'Gauteng', 'Aqabah', 'Houston',
    'Lake County', 'Marrakech-Safi', 'Detroit', 'Minya', 'Ann Arbor', 'Littleton', 'Greater Accra', 'Rajasthan',
    'Brevard County', 'Greater Palm Springs', 'National Capital Territory of Delhi', 'Managua Department', 'Karnataka',
    'Denver'
]

df = df[~df['Governorate'].isin(values_to_drop)]


Giza
Cairo
Fayyum
Red Sea and Sinai
Suez
Ismailia
Alexandria
Luxor
Aswan
Naama
Matrouh
Said
New Egypt
Fayetteville
Nile River Valley
River Nile State


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import random

# Sample Data
selected_columns = ['Title', 'Tag', 'Review', 'Comment', 'Address', 'Country', 'Price', 'Rating', 'tags', 'Governorate']
df = df[selected_columns].dropna()

df['Tag'] = df['Tag'].astype(str)
df['Review'] = df['Review'].astype(str)
df['Comment'] = df['Comment'].astype(str)

# Feature Engineering
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Tag'] + ' ' + df['Review'] + ' ' + df['Comment'])

# Function to get recommendations based on user preferences, location, and price
def get_recommendations_with_budget(country, governorates, survey_responses, df, num_days, budget):
    # Filter places that match the user's country and any of the governorates
    filtered_df = df[(df['Country'] == country) & (df['Governorate'].isin(governorates))]

    # Check if there is data available
    if filtered_df.empty:
        print("No data found for the specified country and governorates.")
        return pd.DataFrame(columns=['Title', 'Price', 'tags', 'Governorate', 'Day'])

    # Ensure "Restaurants" and "Hotels" are always present in user profile
    user_profile = f"{country} {' '.join(governorates)} {' '.join(survey_responses)}"

    # Transform the user profile using the same TF-IDF vectorizer
    user_profile_vectorized = tfidf_vectorizer.transform([user_profile])

    # Transform places' descriptions (assuming 'tags' contains relevant information)
    places_vectorized = tfidf_vectorizer.transform(filtered_df['tags'])

    # Calculate cosine similarity between user profile and places
    sim_scores = linear_kernel(user_profile_vectorized, places_vectorized).flatten()

    # Calculate the maximum price per day based on the budget and number of days
    max_price_per_day = budget / num_days

    # Calculate number of days per governorate
    days_per_governorate = num_days // len(governorates)
    extra_days = num_days % len(governorates)

    governorate_days = {gov: days_per_governorate for gov in governorates}
    for i in range(extra_days):
        governorate_days[governorates[i]] += 1

    # Print the number of days in each governorate
    print("\nNumber of days in each governorate:")
    for gov, days in governorate_days.items():
        print(f"{gov}: {days} days")

    # Initialize recommendations DataFrame
    recommendations_df = pd.DataFrame(columns=['Title', 'Price', 'tags', 'Governorate', 'Day'])

    # Loop through each day
    day_counter = 1
    for gov in governorates:
        for day in range(governorate_days[gov]):
            # Initialize daily recommendations list
            daily_recommendations = []

            # Filter places for the current governorate
            governorate_df = filtered_df[filtered_df['Governorate'] == gov]

            # On the first day in each governorate, recommend a hotel and two places related to the survey
            if day == 0:
                # Add hotel recommendation for the day
                hotels = governorate_df[governorate_df['tags'].str.lower().str.contains('hotel') & (governorate_df['Price'] <= max_price_per_day)]
                if not hotels.empty:
                    hotel_recommendation = hotels.sample(1)[['Title', 'Price', 'tags', 'Governorate']]
                    daily_recommendations.append(hotel_recommendation)

            # Add restaurant recommendation for each day
            restaurants = governorate_df[governorate_df['tags'].str.lower().str.contains('restaurant') & (governorate_df['Price'] <= max_price_per_day)]
            if not restaurants.empty:
                restaurant_recommendation = restaurants.sample(1)[['Title', 'Price', 'tags', 'Governorate']]
                daily_recommendations.append(restaurant_recommendation)

            # Add two places related to the survey for each day
            for response in survey_responses:
                # Get indices of places containing the current survey response
                response_indices = [i for i, tag in enumerate(governorate_df['tags']) if response.lower() in tag.lower()]

                # If there are places with the current tag, select a random one within the budget
                if response_indices:
                    valid_indices = [idx for idx in response_indices if governorate_df.iloc[idx]['Price'] <= max_price_per_day]
                    if valid_indices:
                        random_index = random.choice(valid_indices)
                        recommendation = governorate_df.iloc[[random_index]][['Title', 'Price', 'tags', 'Governorate']]
                        daily_recommendations.append(recommendation)

            # Add the daily recommendations to the overall recommendations DataFrame
            for recommendation in daily_recommendations:
                recommendation['Day'] = day_counter
                recommendations_df = pd.concat([recommendations_df, recommendation])

            # Print recommendations for the day
            print(f"\nRecommendations for Day {day_counter}:")
            for i, recommendation in enumerate(daily_recommendations):
                print(f"\nRecommendation {i + 1}:")
                print(f"Title: {recommendation['Title'].values[0]}")
                print(f"Price: {recommendation['Price'].values[0]}")
                print(f"Tags: {recommendation['tags'].values[0]}")
                print(f"Governorate: {recommendation['Governorate'].values[0]}")

            day_counter += 1

    return recommendations_df

# Example: Get recommendations for a user with a specific country, governorates, survey responses, number of days, and budget
user_country = "Egypt"
user_governorates = ["Alexandria", "Cairo", "Giza", "Luxor","Aswan","Fayyum","Red Sea and Sinai"]
user_survey_responses = ["Museums", "water places", "for fun", "Archaeological tourism", "Religious Tourism", "malls", "parks", "Tours", "Natural views"]
num_days = int(input("Enter the number of days you want to spend: "))
budget = float(input("Enter your budget: "))

recommendations = get_recommendations_with_budget(user_country, user_governorates, user_survey_responses, df, num_days, budget)
