In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:

# Loading data from S3 bucket
url = 'https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/kayak.csv'
kayak_df = pd.read_csv(url)

For each destination, the table contains daily forecasts for next 7 days for each city. Let us see group toghether the forecast by the city to obtain mean values of different weather characteristics for the next 7 days.

In [3]:
weekly_forecast_by_city = kayak_df.groupby(['city_name']).mean(numeric_only=True).reset_index()
weekly_forecast_by_city.head()

Unnamed: 0,city_name,city_id,city_latitude,city_longtitude,score,hotel_latitude,hotel_longtitude,weather_id,pressure,humidity,...,temp_day,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn
0,Aigues-Mortes,298018344.0,43.565823,4.191284,8.769565,43.566611,4.190647,672.285714,1014.285714,62.714286,...,12.987143,8.914286,13.67,10.494286,11.688571,9.661429,11.98,8.767143,10.367143,8.034286
1,Aix-en-Provence,297906924.0,43.529842,5.447474,8.153333,43.527318,5.44623,672.428571,1012.571429,54.714286,...,14.225714,8.384286,15.204286,10.165714,12.64,8.924286,13.131429,9.304286,11.615714,8.064286
2,Amiens,297534793.0,49.894171,2.295695,8.53913,49.89503,2.29915,672.428571,1018.857143,67.714286,...,8.785714,3.125714,9.16,4.674286,6.56,3.25,6.48,1.917143,3.87,0.704286
3,Annecy,298516920.0,45.899235,6.128885,8.204348,45.899733,6.126764,629.714286,1014.714286,66.428571,...,10.76,5.052857,11.647143,6.501429,8.431429,6.044286,9.225714,5.264286,6.832857,5.207143
4,Ariège,297389050.0,42.945537,1.406554,8.526667,42.821752,1.643028,605.285714,1015.428571,76.285714,...,4.642857,0.255714,5.694286,1.324286,2.555714,0.778571,3.495714,0.334286,1.824286,-0.541429


I think that the perfect weather for visiting a city is about 25°C. So I will choose five cities where mean temperature for next 7 days is closest to 25°C.

In [4]:
weekly_forecast_by_city["dif_from_optimum"] = abs(25 - weekly_forecast_by_city["temp_day"])
best_cities_df = weekly_forecast_by_city[["city_name", "temp_day", "dif_from_optimum"]].sort_values(by=['dif_from_optimum'])[:5]
display(best_cities_df)

Unnamed: 0,city_name,temp_day,dif_from_optimum
12,Cassis,14.448571,10.551429
1,Aix-en-Provence,14.225714,10.774286
10,Bormes-les-Mimosas,13.577143,11.422857
27,Nîmes,13.138571,11.861429
0,Aigues-Mortes,12.987143,12.012857


In [5]:
best_cities = best_cities_df["city_name"].tolist()
best_cities

['Cassis', 'Aix-en-Provence', 'Bormes-les-Mimosas', 'Nîmes', 'Aigues-Mortes']

In [6]:
weekly_forecast_by_city.loc[weekly_forecast_by_city['city_name'].isin(best_cities)]

Unnamed: 0,city_name,city_id,city_latitude,city_longtitude,score,hotel_latitude,hotel_longtitude,weather_id,pressure,humidity,...,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,dif_from_optimum
0,Aigues-Mortes,298018344.0,43.565823,4.191284,8.769565,43.566611,4.190647,672.285714,1014.285714,62.714286,...,8.914286,13.67,10.494286,11.688571,9.661429,11.98,8.767143,10.367143,8.034286,12.012857
1,Aix-en-Provence,297906924.0,43.529842,5.447474,8.153333,43.527318,5.44623,672.428571,1012.571429,54.714286,...,8.384286,15.204286,10.165714,12.64,8.924286,13.131429,9.304286,11.615714,8.064286,10.774286
10,Bormes-les-Mimosas,297749145.0,43.150697,6.341928,8.364706,43.149056,6.341032,716.285714,1013.0,70.714286,...,9.422857,14.071429,10.071429,11.348571,10.307143,12.511429,9.275714,10.545714,9.371429,11.422857
12,Cassis,298072685.0,43.214036,5.539632,8.404167,43.215186,5.537372,716.0,1012.857143,64.571429,...,10.757143,14.788571,11.907143,13.16,11.472857,13.637143,11.357143,12.548571,10.745714,10.551429
27,Nîmes,297997049.0,43.837425,4.360069,8.309524,43.837314,4.360027,672.142857,1014.0,59.0,...,7.292857,14.515714,9.167143,11.547143,8.112857,12.047143,7.36,9.681429,6.37,11.861429


Creating a map that will show where the cities with the best weather are situated.

In [7]:
fig = px.scatter_mapbox(weekly_forecast_by_city.loc[weekly_forecast_by_city['city_name'].isin(best_cities)], 
                        lat="city_latitude", lon="city_longtitude", color_continuous_scale=px.colors.sequential.Bluered_r,
                        hover_name="city_name", size = "temp_day", color="dif_from_optimum", zoom=4, mapbox_style="open-street-map")
fig.show()

Among the hotels situated in 5 best cities, let us choose 20 hotels with the best scores.

In [8]:
best_hotels_df = kayak_df[kayak_df['city_name'].isin(best_cities)].groupby(['hotel_name']).mean(numeric_only=True).sort_values(by=['score'], ascending=False).reset_index()[:20]
best_hotels_df.head()

Unnamed: 0,hotel_name,city_id,city_latitude,city_longtitude,score,hotel_latitude,hotel_longtitude,weather_id,pressure,humidity,...,temp_day,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn
0,Lilie House,297749145.0,43.150697,6.341928,10.0,43.150903,6.341933,716.285714,1013.0,70.714286,...,13.577143,9.422857,14.071429,10.071429,11.348571,10.307143,12.511429,9.275714,10.545714,9.371429
1,"Crocoloft, proche arènes, loft cosy dans l'écu...",297997049.0,43.837425,4.360069,10.0,43.836461,4.359274,672.142857,1014.0,59.0,...,13.138571,7.292857,14.515714,9.167143,11.547143,8.112857,12.047143,7.36,9.681429,6.37
2,CALANQUE DE PORT PIN,298072685.0,43.214036,5.539632,10.0,43.214591,5.537868,716.0,1012.857143,64.571429,...,14.448571,10.757143,14.788571,11.907143,13.16,11.472857,13.637143,11.357143,12.548571,10.745714
3,Marcelle en Camargue,298018344.0,43.565823,4.191284,9.9,43.566156,4.192345,672.285714,1014.285714,62.714286,...,12.987143,8.914286,13.67,10.494286,11.688571,9.661429,11.98,8.767143,10.367143,8.034286
4,Le Domus Arena quartier arènes avec garage,297997049.0,43.837425,4.360069,9.8,43.836014,4.359911,672.142857,1014.0,59.0,...,13.138571,7.292857,14.515714,9.167143,11.547143,8.112857,12.047143,7.36,9.681429,6.37


In [9]:
best_hotels = best_hotels_df["hotel_name"].tolist()

# Displaying names of 5 hotels with the best score
best_hotels[:5]

['Lilie House',
 "Crocoloft, proche arènes, loft cosy dans l'écusson",
 'CALANQUE DE PORT PIN',
 'Marcelle en Camargue',
 'Le Domus Arena quartier arènes avec garage']

Creating a map that shows top-20 hotels in the area

In [10]:
fig = px.scatter_mapbox(kayak_df.loc[kayak_df['hotel_name'].isin(best_hotels)], 
                        lat="hotel_latitude", lon="hotel_longtitude", color_continuous_scale=px.colors.sequential.Bluered,
                        hover_name="hotel_name", color="score", size="score",
                        zoom=4, mapbox_style="open-street-map")
fig.show()