In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
from tkinter import font
from turtle import color
from cairo import FontWeight
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
# Function to display results with unique titles for the cheapest and most expensive referrals
def display_unique_title_results(df, month, max_price):
    # Filter data for the selected month and max price
    month_data = df[(df['start_month'] == month) & (df['price'] <= max_price)]

    
    if month_data.empty:
        print("No data available for the selected month and price range.")
        return

    # Sort by review quantity as MOST POPULAR
    month_data_sorted_by_reviews = month_data.sort_values(by=['review_quantity', 'score'], ascending=False)

    # Group by title to get unique titles
    unique_titles_cheapest = month_data_sorted_by_reviews.groupby('title').apply(lambda x: x.nsmallest(1, 'price')).reset_index(drop=True)
    unique_titles_expensive = month_data_sorted_by_reviews.groupby('title').apply(lambda x: x.nlargest(1, 'price')).reset_index(drop=True)

    # Select the top 10 cheapest unique titles
    top_10_cheapest_unique_titles = unique_titles_cheapest.nsmallest(10, 'price')

    # Select the top 10 most expensive unique titles
    top_10_most_expensive_unique_titles = unique_titles_expensive.nlargest(10, 'price')

    # Function to format DataFrame for pretty printing
    def format_for_display(df):
        # Create a formatted string for each row
        formatted_rows = []
        for index, row in df.iterrows():
            formatted_rows.append(f"{row['title']:<30} {row['review_quantity']:>5} {row['price']:>10.2f} {row['score']:>5.1f} {row['start_date'].strftime('%Y-%m-%d')} {row['link']}")
        return '\n'.join(formatted_rows)

    # Display the results with links
    print(f"\nTop 10 Cheapest Unique Titles for Month {month}:")
    if not top_10_cheapest_unique_titles.empty:
        print(format_for_display(top_10_cheapest_unique_titles))
    else:
        print("No data available.")

    print(f"\nTop 10 Most Expensive Unique Titles for Month {month}:")
    if not top_10_most_expensive_unique_titles.empty:
        print(format_for_display(top_10_most_expensive_unique_titles))
    else:
        print("No data available.")

# Load data
df = pd.read_csv('booking_list_in_vilnius_review_more_than_10.csv', encoding='utf-8')

# Separating date interval
split_dates = df['date'].str.split('-', n=3, expand=True)
df['start_date'] = split_dates[0] + '-' + split_dates[1] + '-' + split_dates[2]
df['end_date'] = split_dates[3].str[:4] + '-' + split_dates[3].str[5:7] + '-' + split_dates[3].str[8:]

# Converting dates to a datetime format
df['start_date'] = pd.to_datetime(df['start_date'], errors='coerce')
df['end_date'] = pd.to_datetime(df['end_date'], errors='coerce')

# Extracting month from 'start date'
df['start_month'] = df['start_date'].dt.month

# Converting price column to numeric value, removing eur symbol and commas
df['price'] = df['price'].replace('[\€,]', '', regex=True)
df['price'] = df['price'].str.replace(' ', '')  # removing spaces
df['price'] = pd.to_numeric(df['price'], errors='coerce')  # converting to numeric

# Convert review quantity to numeric
df['review_quantity'] = pd.to_numeric(df['review_quantity'], errors='coerce')

# Converting score to numeric
df['score'] = df['score'].str.extract(r'(\d+\,\d+)')[0]  # Extracting numeric part with comma
df['score'] = df['score'].str.replace(',', '.').astype(float)  # Replacing comma with dot and converting to float

# Add 'link' column to dataframe
df['link'] = df['link'].str.strip()  # Make sure to clean up any extra spaces

# Prompt the user for inputs
try:
    month = int(input("Enter the month (1-12) you want to travel: "))
    max_price = float(input("Enter the maximum price per night: "))
    display_unique_title_results(df, month, max_price)
except ValueError:
    print("Invalid input. Please enter numeric values for month and price.")


Top 10 Cheapest Unique Titles for Month 5:
Vilnius Old Town accommodation   519      54.00   9.4 2025-05-23 https://www.booking.com/hotel/lt/old-town-guest-house.lt.html?aid=304142&label=gen173nr-1FCAQoggJCDnNlYXJjaF92aWxuaXVzSBlYBGiIAYgBAZgBGbgBGcgBDNgBAegBAfgBA4gCAagCA7gCoKegtQbAAgHSAiQ4OGQ1YTg2ZS0xMGRjLTQyZWQtYjE4My1lZTUyM2RkNzJkMjPYAgXgAgE&ucfs=1&arphpl=1&checkin=2025-05-23&checkout=2025-05-24&group_adults=2&req_adults=2&no_rooms=1&group_children=0&req_children=0&hpos=13&hapos=38&sr_order=review_score_and_price&nflt=distance%3D1000%3Breview_score%3D80&srpvid=d6ef9c1042940029&srepoch=1722291118&all_sr_blocks=286939101_108234912_2_0_0&highlighted_blocks=286939101_108234912_2_0_0&matching_block_id=286939101_108234912_2_0_0&sr_pri_blocks=286939101_108234912_2_0_0__5400&from_sustainable_property_sr=1&from=searchresults
Sweet home6                       67      59.00   9.7 2025-05-01 https://www.booking.com/hotel/lt/sweet-home6.lt.html?aid=304142&label=gen173nr-1FCAQoggJCDnNlYXJjaF92aWx