In [1]:
import pandas as pd
import plotly.express as px

In [2]:

# Loading data from S3 bucket
url = 'https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/kayak.csv'
kayak_df = pd.read_csv(url)

For each destination, the table contains daily forecasts for next 7 days for each city. Let us see group toghether the forecast by the city to obtain mean values of different weather characteristics for the next 7 days.

In [3]:
weekly_forecast_by_city = kayak_df.groupby(['city_name']).mean(numeric_only=True).reset_index()
weekly_forecast_by_city.head()

Unnamed: 0,city_name,city_id,city_latitude,city_longtitude,score,hotel_latitude,hotel_longtitude,weather_id,pressure,humidity,...,temp_day,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn
0,Aigues-Mortes,298018344.0,43.565823,4.191284,8.769565,43.566611,4.190647,663.142857,1011.142857,75.0,...,9.617143,6.437143,10.471429,7.922857,9.142857,7.092857,7.555714,5.272857,6.948571,4.598571
1,Aix-en-Provence,297906924.0,43.529842,5.447474,8.153333,43.527318,5.44623,646.571429,1009.714286,62.285714,...,10.554286,6.537143,11.868571,7.798571,9.584286,7.084286,9.402857,6.341429,8.368571,5.612857
2,Amiens,297534793.0,49.894171,2.295695,8.53913,49.89503,2.29915,715.142857,1018.714286,57.0,...,6.948571,1.47,7.471429,2.774286,4.358571,1.95,3.37,-0.874286,0.625714,-1.874286
3,Annecy,298516920.0,45.899235,6.128885,8.204348,45.899733,6.126764,601.285714,1011.428571,65.428571,...,8.252857,3.221429,8.954286,3.892857,4.951429,3.814286,6.355714,2.918571,3.515714,2.278571
4,Ariège,297389050.0,42.945537,1.406554,8.526667,42.821752,1.643028,605.0,1012.857143,71.857143,...,1.298571,-3.047143,2.007143,-1.992857,-1.228571,-2.63,-0.601429,-3.222857,-2.362857,-3.667143


I think that the perfect weather for visiting a city is about 25°C. So I will choose five cities where mean temperature for next 7 days is closest to 25°C.

In [4]:
weekly_forecast_by_city["dif_from_optimum"] = abs(25 - weekly_forecast_by_city["temp_day"])
best_cities_df = weekly_forecast_by_city[["city_name", "temp_day", "dif_from_optimum"]].sort_values(by=['dif_from_optimum'])[:5]
display(best_cities_df)

Unnamed: 0,city_name,temp_day,dif_from_optimum
10,Bormes-les-Mimosas,11.664286,13.335714
14,Collioure,11.298571,13.701429
12,Cassis,11.118571,13.881429
1,Aix-en-Provence,10.554286,14.445714
0,Aigues-Mortes,9.617143,15.382857


In [5]:
best_cities = best_cities_df["city_name"].tolist()
best_cities

['Bormes-les-Mimosas',
 'Collioure',
 'Cassis',
 'Aix-en-Provence',
 'Aigues-Mortes']

In [6]:
weekly_forecast_by_city.loc[weekly_forecast_by_city['city_name'].isin(best_cities)]

Unnamed: 0,city_name,city_id,city_latitude,city_longtitude,score,hotel_latitude,hotel_longtitude,weather_id,pressure,humidity,...,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,dif_from_optimum
0,Aigues-Mortes,298018344.0,43.565823,4.191284,8.769565,43.566611,4.190647,663.142857,1011.142857,75.0,...,6.437143,10.471429,7.922857,9.142857,7.092857,7.555714,5.272857,6.948571,4.598571,15.382857
1,Aix-en-Provence,297906924.0,43.529842,5.447474,8.153333,43.527318,5.44623,646.571429,1009.714286,62.285714,...,6.537143,11.868571,7.798571,9.584286,7.084286,9.402857,6.341429,8.368571,5.612857,14.445714
10,Bormes-les-Mimosas,297749145.0,43.150697,6.341928,8.364706,43.149056,6.341032,587.285714,1009.571429,69.142857,...,7.807143,12.06,8.587143,9.978571,8.774286,9.548571,6.188571,7.774286,6.338571,13.335714
12,Cassis,298072685.0,43.214036,5.539632,8.404167,43.215186,5.537372,630.571429,1009.571429,67.285714,...,8.581429,11.998571,9.56,10.85,9.227143,8.992857,7.941429,9.371429,7.178571,13.881429
14,Collioure,298221742.0,42.52505,3.083155,8.272,42.524811,3.08269,617.714286,1010.857143,55.571429,...,7.082857,11.782857,8.444286,9.335714,7.592857,9.257143,5.751429,6.914286,4.77,13.701429


Creating a map that will show where the cities with the best weather are situated.

In [7]:
fig = px.scatter_mapbox(weekly_forecast_by_city.loc[weekly_forecast_by_city['city_name'].isin(best_cities)], 
                        lat="city_latitude", lon="city_longtitude", color_continuous_scale=px.colors.sequential.Bluered_r,
                        hover_name="city_name", size = "temp_day", color="dif_from_optimum", zoom=4, mapbox_style="open-street-map")
fig.show()

Among the hotels situated in 5 best cities, let us choose 20 hotels with the best scores.

In [8]:
best_hotels_df = kayak_df[kayak_df['city_name'].isin(best_cities)].groupby(['hotel_name']).mean(numeric_only=True).sort_values(by=['score'], ascending=False).reset_index()[:20]
best_hotels_df.head()

Unnamed: 0,hotel_name,city_id,city_latitude,city_longtitude,score,hotel_latitude,hotel_longtitude,weather_id,pressure,humidity,...,temp_day,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn
0,Lilie House,297749145.0,43.150697,6.341928,10.0,43.150903,6.341933,587.285714,1009.571429,69.142857,...,11.664286,7.807143,12.06,8.587143,9.978571,8.774286,9.548571,6.188571,7.774286,6.338571
1,CALANQUE DE PORT PIN,298072685.0,43.214036,5.539632,10.0,43.214591,5.537868,630.571429,1009.571429,67.285714,...,11.118571,8.581429,11.998571,9.56,10.85,9.227143,8.992857,7.941429,9.371429,7.178571
2,Marcelle en Camargue,298018344.0,43.565823,4.191284,9.9,43.566156,4.192345,663.142857,1011.142857,75.0,...,9.617143,6.437143,10.471429,7.922857,9.142857,7.092857,7.555714,5.272857,6.948571,4.598571
3,Sublime Apt With Wifi In The Heart Of The City!,297906924.0,43.529842,5.447474,9.7,43.527615,5.447049,646.571429,1009.714286,62.285714,...,10.554286,6.537143,11.868571,7.798571,9.584286,7.084286,9.402857,6.341429,8.368571,5.612857
4,Allées Provençales,297906924.0,43.529842,5.447474,9.7,43.526634,5.442878,646.571429,1009.714286,62.285714,...,10.554286,6.537143,11.868571,7.798571,9.584286,7.084286,9.402857,6.341429,8.368571,5.612857


In [9]:
best_hotels = best_hotels_df["hotel_name"].tolist()

# Displaying names of 5 hotels with the best score
best_hotels[:5]

['Lilie House',
 'CALANQUE DE PORT PIN',
 'Marcelle en Camargue',
 'Sublime Apt With Wifi In The Heart Of The City!',
 'Allées Provençales']

Creating a map that shows top-20 hotels in the area

In [10]:
fig = px.scatter_mapbox(kayak_df.loc[kayak_df['hotel_name'].isin(best_hotels)], 
                        lat="hotel_latitude", lon="hotel_longtitude", color_continuous_scale=px.colors.sequential.Bluered,
                        hover_name="hotel_name", color="score", size="score",
                        zoom=4, mapbox_style="open-street-map")
fig.show()