In [1]:
## libraries
import pandas as pd
import numpy as np
import pyarrow.parquet as pq
import pyarrow as pa
import matplotlib.pyplot as plt
from math import radians


In [2]:
## let's read the data

## booking dataset 
path = 'C:/Users/P-SCHULT/Dropbox/ZEW Material/hotels_and_9eur_ticket/data/prepared_data/booking_scrape.parquet'
columns_to_select = ['scrape_day', 'days_until_check_in', 'hotel_id', 'country', 'region', 'hotel_location', 'hotel_rating', 'hotel_price', 'occupancy_info', 'hotels_in_area', 'check_in_date']   

booking = pd.read_parquet(path, engine = 'pyarrow', columns = columns_to_select)

#get the geocode info
path = 'C:/Users/P-SCHULT/Dropbox/ZEW Material/hotels_and_9eur_ticket/data/prepared_data/geocode/booking/booking_scrape_addresses.parquet'
booking_geocode = pd.read_parquet(path, engine = 'pyarrow')


In [3]:
## let's convert scrape_day to datetime
booking['scrape_day'] = pd.to_datetime(booking['scrape_day'], format = '%Y-%m-%d')
booking['check_in_date'] = pd.to_datetime(booking['check_in_date'], format = '%Y-%m-%d')

## let's create a new column with the review score group
booking['hotel_rating_group'] = np.where(booking['hotel_rating'] <= 5, 'bad', np.where(booking['hotel_rating'] <= 8, 'average', 'good'))
## let's first convert hotel_price to numeric
booking['hotel_price'] = pd.to_numeric(booking['hotel_price'])

# create a month column
booking['month'] = booking['check_in_date'].dt.month_name()
booking['week_number'] = booking['check_in_date'].dt.isocalendar().week

# Set initial latitude and longitude to NaN for all rows in booking
booking['latitude'] = np.nan
booking['longitude'] = np.nan
# Create a dictionary mapping hotel_id to latitude and longitude from booking_geocode
hotel_id_to_lat = booking_geocode.set_index('hotel_id')['latitude'].to_dict()
hotel_id_to_long = booking_geocode.set_index('hotel_id')['longitude'].to_dict()

# Update latitude and longitude values in booking dataframe based on hotel_id
booking['latitude'] = booking['hotel_id'].map(hotel_id_to_lat)
booking['longitude'] = booking['hotel_id'].map(hotel_id_to_long)


# Create a list of month names in the desired order
month_order = ['August', 'September', 'October']


# Convert the 'month' column to a categorical data type with the desired order
booking['month'] = pd.Categorical(booking['month'], categories=month_order, ordered=True)

booking.head()  

Unnamed: 0,scrape_day,days_until_check_in,hotel_id,country,region,hotel_location,hotel_rating,hotel_price,occupancy_info,hotels_in_area,check_in_date,hotel_rating_group,month,week_number,latitude,longitude
0,2022-08-09,1,0,Others,Wien,"07. Neubau, Wien",8.3,125.0,Basierend auf Ihren Filtern sind 84% der Unter...,370.0,2022-08-10,good,August,32,48.198454,16.340475
1,2022-08-09,1,2,Others,Wien,"10. Favoriten, Wien",7.7,180.0,Basierend auf Ihren Filtern sind 84% der Unter...,370.0,2022-08-10,average,August,32,48.158762,16.345044
2,2022-08-09,1,3,Others,Wien,"07. Neubau, Wien",8.7,149.0,Basierend auf Ihren Filtern sind 84% der Unter...,370.0,2022-08-10,good,August,32,48.206371,16.354678
3,2022-08-09,1,4,Others,Wien,"10. Favoriten, Wien",8.6,80.0,Basierend auf Ihren Filtern sind 84% der Unter...,370.0,2022-08-10,good,August,32,48.165903,16.34808
4,2022-08-09,1,7,Others,Wien,"01. Innere Stadt, Wien",8.4,125.0,Basierend auf Ihren Filtern sind 84% der Unter...,370.0,2022-08-10,good,August,32,48.211079,16.383851


In [None]:
## let's do the most basic descriptive analysis
## how many hotels are in the dataset?
print('Number of hotels in the dataset: ', len(booking['hotel_id'].unique()))

## how many countries are in the dataset?
print('Number of countries in the dataset: ', len(booking['country'].unique()))

## how many regions are in the dataset?
print('Number of regions in the dataset: ', len(booking['region'].unique()))

## let's make some plots
## how many hotels are in each country?
booking.groupby('country')['hotel_id'].nunique().plot(kind = 'bar', figsize = (15, 5)
                                                      , title = 'Number of hotels in each country')                                                 
plt.show()

In [None]:
## more plots, let's compare price and rating

## let's plot the average price per rating, but group the rating by 0.5
booking.groupby(booking['hotel_rating'].round(1))['hotel_price'].mean().plot(kind = 'bar', figsize = (15, 5)
                                                                                , title = 'Average price per rating')
plt.show()



In [None]:

# Create a figure and axis object
fig, ax = plt.subplots(figsize=(15, 5))

# Plot the average price per country per month in the correct order 
booking.groupby(['country', 'month'])['hotel_price'].mean().unstack(level=0).plot(kind='bar', ax=ax, title='Average price per country per month')

# Set the ylabel
ax.set_ylabel('Average Price')

# Add a legend
plt.legend()

# xlabel
ax.set_xlabel('')

# Add a grid
ax.grid(axis='y', linestyle='--')
# rotate the labels
plt.xticks(rotation=0)

# Show the plot
plt.show()

In [None]:
grouped_data = booking.groupby(['country', 'month'])['hotel_price'].agg(['mean', 'std'])

# Calculate mean divided by standard deviation
grouped_data['mean_divided_by_sd'] = grouped_data['mean'] / grouped_data['std']

# Create a figure and axis object
fig, ax = plt.subplots(figsize=(15, 5))

# Plot the mean divided by standard deviation per country per month
grouped_data['mean_divided_by_sd'].unstack(level=0).plot(kind='bar', ax=ax, title='Mean divided by Standard Deviation per country per month')

# Set the ylabel
ax.set_ylabel('Mean divided by Standard Deviation')

# Add a legend
plt.legend()
# xlabel
ax.set_xlabel('')

# Add a grid
ax.grid(axis='y', linestyle='--')

# rotate the labels
plt.xticks(rotation=0)

# Show the plot
plt.show()


In [None]:
# make a day average price plot for each country
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(15, 5))

# Plot the average price per country per day
booking.groupby(['country', 'check_in_date'])['hotel_price'].mean().unstack(level=0).plot(ax=ax, title='Average price per country per day')
# remove xlabel
ax.set_xlabel('')

In [None]:
## let's do a table of averages per country per month
# dataframe
df = booking.groupby(['country', 'month'])['hotel_price'].mean().unstack(level=0)
print(df)
# now about sd 
df_sd = booking.groupby(['country', 'month'])['hotel_price'].std().unstack(level=0)
print('\n')
print(df_sd)

## lets do the same for each part of the review score, group bad scores, 1-5, average scores, 6-8, and good scores, 9-10
## first let's convert the review score to numeric


## let's analyze the average price per rating group per country per month
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(20, 5))

# Plot the average price per country per month per days_until_check_in (which is either 1,3,7)
booking.groupby(['country', 'month', 'hotel_rating_group'])['hotel_price'].mean().unstack(level=0).plot(kind='bar', ax=ax, title='Average price per country per month per rating group')    

# add legend
plt.legend()
# remove xlabel
ax.set_xlabel('')
# rotate the labels
plt.xticks(rotation=0)
# Add a grid
ax.grid(axis='y', linestyle='--')

## let's analyze the average price per rating group per country per month
# Create a figure and axis object
fig, ax = plt.subplots(figsize=(20, 5))

# Plot the average price per country per month per days_until_check_in (which is either 1,3,7)
booking.groupby(['country', 'month', 'days_until_check_in'])['hotel_price'].mean().unstack(level=0).plot(kind='bar', ax=ax, title='Average price per country per month per days until check in')    

# add legend
plt.legend()
# remove xlabel
ax.set_xlabel('')
# rotate the labels
plt.xticks(rotation=0)
# Add a grid
ax.grid(axis='y', linestyle='--')

# create a dataframe of the average price per country per month per days_until_check_in
df = booking.groupby(['country', 'month', 'days_until_check_in'])['hotel_price'].mean().unstack(level=0)
# Filter the dataframe for August and September
df_aug_sep = df[df.index.get_level_values('month').isin(['August', 'September'])]


# create a dataframe of the average price per country per month per days_until_check_in
df = booking.groupby(['country', 'month', 'days_until_check_in'])['hotel_price'].mean().unstack(level=0)
# Filter the dataframe for August and September
difference = df_aug_sep.loc['September'] - df_aug_sep.loc['August']
print('\n',difference)
# plot difference
difference.plot(kind='bar', title='Difference in average price between August and September per country per days until check in')


In [None]:
# we want to filter the hotels that are appearing in both August and September by hotel_id
# let's filter the booking to a dataset that only contains the hotels that are appearing in both August and September
df = booking.groupby(['region', 'month'])['hotel_price'].mean().unstack(level=1)
# drop october 
df = df.drop('October', axis=1)
# drop na
df = df.dropna()

# plot 
df.plot(kind='bar', figsize=(15, 5), title='Average price per region per month')
# grid
plt.grid(axis='y', linestyle='--')
# remove xlabel
plt.xlabel('')

In [None]:
# we want to filter the hotels that are appearing in both August and September by hotel_id
# let's filter the booking to a dataset that only contains the hotels that are appearing in both August and September
df = booking.groupby(['hotel_id', 'month', 'region'])['hotel_price'].mean().unstack(level=1)
# drop october 
df = df.drop('October', axis=1)
# drop na
df = df.dropna()


# let's average per city
df2 = df.groupby('region').mean()

# plot
df2.plot(kind='bar', figsize=(15, 5), title='Difference in average price between August and September per city')
# grid
plt.grid(axis='y', linestyle='--')
# remove xlabel
plt.xlabel('')





In [None]:
# let's compute the difference between August and September
difference = df['September'] - df['August']

# let's average per city
difference = difference.groupby('region').mean()

# plot
difference.plot(kind='bar', figsize=(15, 5), title='Difference in average price between August and September per city')
# grid
plt.grid(axis='y', linestyle='--')
# remove xlabel
plt.xlabel('')


In [None]:
## let's analyze the average price per rating group per country per week_number
df = booking.groupby(['country', 'week_number'])['hotel_price'].mean().unstack(level=0)
# line plot
df.plot(kind='line', figsize=(15, 5), title='Average price per country per week number per rating group')
# grid
plt.grid(axis='y', linestyle='--')
# remove xlabel
plt.xlabel('')

# add a vertical line at week 35 # from August 29th, which was a Monday, to September 4th which was a Sunday
plt.axvline(x=35, color='red', linestyle='--')


## let's analyze by doing an average every 3 days for each country
df = booking.groupby(['country', 'check_in_date'])['hotel_price'].mean().unstack(level=0)
# resample every 3 days
df = df.resample('3D').mean()
# plot
df.plot(kind='line', figsize=(15, 5), title='Average price per country per 3 days')
# grid
plt.grid(axis='y', linestyle='--')
# remove xlabel
plt.xlabel('')
# add a vertical line at day 01/09/2022
plt.axvline(x='2022-09-01', color='red', linestyle='--')



In [None]:
# Filter bookings for August and September
df = booking[booking['month'].isin(['August', 'September'])]

# Select hotels in Germany and France
df = df[df['country'].isin(['Germany', 'France'])]

# Filter dates from 15-08-2022 to 15-09-2022
df = df[(df['check_in_date'] >= '2022-08-15') & (df['check_in_date'] <= '2022-09-15')]

# Do the average per day for the hotel_id that appears in both August and September
df = df.groupby(['check_in_date', 'country'])['hotel_price'].mean().unstack(level=1)

# Plot line chart of the average price per country per day
df.plot(kind='line', figsize=(15, 5), title='Average price per country per day')

# Add grid
plt.grid(axis='y', linestyle='--')

# Remove xlabel
plt.xlabel('')

# Do a vertical line at 2022-09-01
plt.axvline(x='2022-09-01', color='red', linestyle='--')

# Do an average for the last 15 days of August and add it as a horizontal line for the last 15 days of August
df_aug = df[df.index.get_level_values('check_in_date') >= '2022-08-15']
df_aug = df_aug[df_aug.index.get_level_values('check_in_date') < '2022-09-01']
df_aug = df_aug.mean()
plt.axhline(y=df_aug['France'], color='blue', linestyle='--', xmin=0, xmax=0.55)
plt.axhline(y=df_aug['Germany'], color='orange', linestyle='--', xmin=0, xmax=0.55)

# Do an average for the first 15 days of September and add it as a horizontal line for the first 15 days of September
df_sep = df[df.index.get_level_values('check_in_date') >= '2022-09-01']
df_sep = df_sep[df_sep.index.get_level_values('check_in_date') < '2022-09-15']
df_sep = df_sep.mean()
plt.axhline(y=df_sep['France'], color='blue', linestyle='--', xmin=0.55, xmax=1)
plt.axhline(y=df_sep['Germany'], color='orange', linestyle='--', xmin=0.55, xmax=1)

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Filter bookings for August and September
df = booking[booking['month'].isin(['August', 'September'])]

# Select hotels in Germany and France
df = df[df['country'].isin(['Germany', 'France'])]

# Filter dates from 15-08-2022 to 15-09-2022
df = df[(df['check_in_date'] >= '2022-08-15') & (df['check_in_date'] <= '2022-09-15')]

### removing outliers
# delete the highest 1% of the prices
df = df[df['hotel_price'] < df['hotel_price'].quantile(0.99)]

# Group by region, check_in_date, and country, calculate the mean hotel_price
df_grouped = df.groupby(['region', 'check_in_date', 'country'])['hotel_price'].mean().unstack(level=2)

# Reset the index of df_grouped
df_grouped = df_grouped.reset_index()

# Create a 5x2 subplots figure
fig, axs = plt.subplots(5, 2, figsize=(25, 40))

# Flatten the axs array for easy iteration
axs = axs.flatten()

# Iterate over the regions and plot the average price per country per day
for i, region in enumerate(df_grouped['region'].unique()):
    region_data = df_grouped[df_grouped['region'] == region]
    region_data.plot(x='check_in_date', y=['France', 'Germany'], kind='line', ax=axs[i], title=region)

    axs[i].grid(axis='y', linestyle='--')
    axs[i].set_xlabel('')

    # Add a vertical line at 2022-09-01
    axs[i].axvline(x='2022-09-01', color='red', linestyle='--')
    
    # Do an average for the last 15 days of August and add it as a horizontal line for the last 15 days of August
    df_aug = df[df['check_in_date'] >= '2022-08-15']
    df_aug = df_aug[df_aug['check_in_date'] < '2022-09-01']
    df_aug = df_aug.groupby('region')['hotel_price'].mean()
    axs[i].axhline(y=df_aug[region], color='red', linestyle='--', xmin=0, xmax=0.55)

    # Do an average for the first 15 days of September and add it as a horizontal line for the first 15 days of September
    df_sep = df[df['check_in_date'] >= '2022-09-01']
    df_sep = df_sep[df_sep['check_in_date'] < '2022-09-15']
    df_sep = df_sep.groupby('region')['hotel_price'].mean()
    axs[i].axhline(y=df_sep[region], color='red', linestyle='--', xmin=0.55, xmax=1)

    # Remove legend
    axs[i].legend().remove()


# Adjust the spacing between subplots
plt.tight_layout()
# remove legend
plt.legend().remove()
# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Filter bookings for August and September
df = booking[booking['month'].isin(['August', 'September'])]

# filter df days_until_checkin is equal to 7
df = df[df['days_until_check_in'] == 7]

# filter df booking['hotel_rating'] is between 5 and 8
df = df[df['hotel_rating'].between(9,10)]

# Select hotels in Germany and France
df = df[df['country'].isin(['Germany', 'France'])]

# Filter dates from 15-08-2022 to 15-09-2022
df = df[(df['check_in_date'] >= '2022-08-20') & (df['check_in_date'] <= '2022-09-10')]

### removing outliers
# Delete the highest 1% of the prices per region
df = df.groupby('region').apply(lambda x: x[x['hotel_price'] < x['hotel_price'].quantile(0.90)]).reset_index(drop=True)

# Group by region, check_in_date, and country, calculate the mean hotel_price
df_grouped = df.groupby(['region', 'check_in_date', 'country'])['hotel_price'].mean().unstack(level=2)

# Reset the index of df_grouped
df_grouped = df_grouped.reset_index()

# Create a 5x2 subplots figure
fig, axs = plt.subplots(5, 2, figsize=(25, 40))

# Flatten the axs array for easy iteration
axs = axs.flatten()

# Iterate over the regions and plot the average price per country per day
for i, region in enumerate(df_grouped['region'].unique()):
    region_data = df_grouped[df_grouped['region'] == region]
    region_data.plot(x='check_in_date', y=['France', 'Germany'], kind='line', ax=axs[i], title=region)

    axs[i].grid(axis='y', linestyle='--')
    axs[i].set_xlabel('')

    # Add a vertical line at 2022-09-01
    axs[i].axvline(x='2022-09-01', color='red', linestyle='--')
    
    # Do an average for the last 15 days of August and add it as a horizontal line for the last 15 days of August
    df_aug = df[df['check_in_date'] >= '2022-08-15']
    df_aug = df_aug[df_aug['check_in_date'] < '2022-09-01']
    df_aug = df_aug.groupby('region')['hotel_price'].mean()
    axs[i].axhline(y=df_aug[region], color='red', linestyle='--', xmin=0, xmax=0.57)

    # Do an average for the first 15 days of September and add it as a horizontal line for the first 15 days of September
    df_sep = df[df['check_in_date'] >= '2022-09-01']
    df_sep = df_sep[df_sep['check_in_date'] < '2022-09-15']
    df_sep = df_sep.groupby('region')['hotel_price'].mean()
    axs[i].axhline(y=df_sep[region], color='red', linestyle='--', xmin=0.57, xmax=1)

    # Remove legend
    axs[i].legend().remove()

# Adjust the spacing between subplots
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import radians

# Assuming you have already imported 'sklearn' and 'booking' DataFrame

# Filter bookings for August and September
df = booking[booking['month'].isin(['August', 'September'])]

# Let's take the hotels from Paris
df = df[df['region'] == 'Paris']

# Let's define latitudes and longitudes for the center of Paris
lat = 48.8566
lon = 2.3522

# Let's calculate the distance from the center of Paris for each hotel
def haversine_distance(lat1, lon1, lat2, lon2):
    r = 6371  # Radius of the Earth in kilometers
    phi1 = radians(lat1)
    phi2 = radians(lat2)
    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)
    a = np.sin(delta_phi / 2) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = r * c
    return distance

df['distance'] = df.apply(lambda x: haversine_distance(x['latitude'], x['longitude'], lat, lon), axis=1)

# Filter rows where days_until_checkin is equal to 7
df = df[df['days_until_check_in'] == 7]

# Assign a flag 'is_center' based on the distance from the center of Paris
df['is_center'] = df['distance'].apply(lambda x: 1 if x < 1 else 0)

# Let's plot the mean average price per day for center and non-center hotels in Paris
df.groupby(['check_in_date', 'is_center'])['hotel_price'].mean().unstack().plot(kind='line', figsize=(10, 5), title='Average price per day for center and non-center hotels in Paris')

plt.show()


In [None]:
def haversine_distance(lat1, lon1, lat2, lon2):
    r = 6371  # Radius of the Earth in kilometers
    phi1 = radians(lat1)
    phi2 = radians(lat2)
    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)
    a = np.sin(delta_phi / 2) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = r * c
    return distance


In [None]:

# Assuming you have already imported 'sklearn' and 'booking' DataFrame

# Filter bookings for August and September
df = booking[booking['month'].isin(['August', 'September'])]

# Let's take the hotels from Paris
df = df[df['region'] == 'Köln']

# Let's define latitudes and longitudes for the center of Köln 50.94382251183968, 6.958902111350515
lat = 50.94382251183968
lon = 6.958902111350515

# Let's calculate the distance from the center of Paris for each hotel

df['distance'] = df.apply(lambda x: haversine_distance(x['latitude'], x['longitude'], lat, lon), axis=1)

# remove outliers
#df = df[df['distance'] < 20]
# remove top 1% of prices
#df = df[df['hotel_price'] < df['hotel_price'].quantile(0.60)]
# filter hotels with rating > 7
#df = df[df['hotel_rating'] > 7]

# Filter rows where days_until_checkin is equal to 7
#df = df[df['days_until_check_in'] == 7]

# Assign a flag 'is_center' based on the distance from the center of koln
df['is_center'] = df['distance'].apply(lambda x: 1 if x < 3 else 0)

# Let's plot the mean average price per day for center and non-center hotels in Paris
df.groupby(['check_in_date', 'is_center'])['hotel_price'].mean().unstack().plot(kind='line', figsize=(10, 5), title='Average price per day for center and non-center hotels in Köln')
# vertical line at 2022-09-01
plt.axvline(x='2022-09-01', color='red', linestyle='--')
plt.show()


## lets build an interactive plot where we can select the city and the date and see the price for center and non-center hotels



In [7]:
import ipywidgets as widgets
from IPython.display import display

# Assuming you have already imported 'sklearn' and 'booking' DataFrame

# Filter bookings for August and September
df = booking[booking['month'].isin(['August', 'September'])]

# Let's take the hotels from Köln
df = df[df['region'] == 'Köln']

# Define latitudes and longitudes for the center of Köln
center_lat = 50.94382251183968
center_lon = 6.958902111350515

# Let's calculate the distance from the center of Köln for each hotel
def haversine_distance(lat1, lon1, lat2, lon2):
    r = 6371  # Radius of the Earth in kilometers
    phi1 = radians(lat1)
    phi2 = radians(lat2)
    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)
    a = np.sin(delta_phi / 2) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = r * c
    return distance

df['distance'] = df.apply(lambda x: haversine_distance(x['latitude'], x['longitude'], center_lat, center_lon), axis=1)

# Define a function to create the interactive plot
def interactive_plot(outlier_percentile, days_until_check_in, min_rating, max_rating, min_distance, max_distance, center_distance):
    # Filter rows based on selected parameters
    df_filtered = df[df['hotel_price'] < df['hotel_price'].quantile(outlier_percentile / 100)]
    if days_until_check_in != 'All':
        df_filtered = df_filtered[df_filtered['days_until_check_in'] == days_until_check_in]
    df_filtered = df_filtered[(df_filtered['hotel_rating'] >= min_rating) & (df_filtered['hotel_rating'] <= max_rating)]
    df_filtered = df_filtered[(df_filtered['distance'] >= min_distance) & (df_filtered['distance'] <= max_distance)]

    # Assign a flag 'is_center' based on the distance from the center of Köln
    df_filtered['is_center'] = df_filtered['distance'].apply(lambda x: 1 if x < center_distance else 0)

    # Plot the mean average price per day for center and non-center hotels in Köln
    plt.figure(figsize=(10, 5))
    df_filtered.groupby(['check_in_date', 'is_center'])['hotel_price'].mean().unstack().plot(kind='line', ax=plt.gca())
    plt.title('Average price per day for center and non-center hotels in Köln')
    plt.axvline(x='2022-09-01', color='red', linestyle='--')
    plt.show()



In [11]:
# Create sliders and widgets
outlier_percentile_slider = widgets.IntSlider(min=1, max=100, value=99, description='Outliers %:')
days_until_check_in_dropdown = widgets.Dropdown(options=['All', 1, 3, 7], value='All', description='Days until check-in:')
min_rating_slider = widgets.FloatSlider(min=0, max=10, step=0.1, value=7, description='Min Rating:')
max_rating_slider = widgets.FloatSlider(min=0, max=10, step=0.1, value=10, description='Max Rating:')
min_distance_slider = widgets.FloatSlider(min=0, max=100, step=0.1, value=0, description='Min Distance:')
max_distance_slider = widgets.FloatSlider(min=0, max=100, step=0.1, value=20, description='Max Distance:')
center_distance_slider = widgets.FloatSlider(min=0, max=100, step=0.1, value=3, description='Center Distance:')

# Create the interactive plot using the interactive function
interactive_plot_widget = widgets.interactive(
    interactive_plot,
    outlier_percentile=outlier_percentile_slider,
    days_until_check_in=days_until_check_in_dropdown,
    min_rating=min_rating_slider,
    max_rating=max_rating_slider,
    min_distance=min_distance_slider,
    max_distance=max_distance_slider,
    center_distance=center_distance_slider
)

# Display the interactive plot
display(interactive_plot_widget)


interactive(children=(IntSlider(value=99, description='Outliers %:', min=1), Dropdown(description='Days until …