In [1]:
# Installing dotenv library to manage confidential keys saved as environment variables
#!pip install python-dotenv

In [2]:
import json
import pandas as pd
import re
import datetime as dt

import boto3

import os
from urllib.request import urlopen
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# Loading .json file with hotel urls
url = "https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/booking_search_page.json"
response_hotel_urls = urlopen(url)
hotel_urls = json.loads(response_hotel_urls.read())

In [4]:
# Creating a dataframe from .json file
hotel_urls_df = pd.DataFrame.from_records(hotel_urls)

In [5]:
hotel_urls_df.head()

Unnamed: 0,hotel_name,hotel_url,booking_city_url
0,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,<HtmlResponse 200 https://www.booking.com/sear...
1,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,<HtmlResponse 200 https://www.booking.com/sear...
2,Les gîtes de Beille,https://www.booking.com/hotel/fr/les-gites-de-...,<HtmlResponse 200 https://www.booking.com/sear...
3,Le petit nid,https://www.booking.com/hotel/fr/le-petit-nid-...,<HtmlResponse 200 https://www.booking.com/sear...
4,Appartement Fontargente,https://www.booking.com/hotel/fr/appartement-f...,<HtmlResponse 200 https://www.booking.com/sear...


In [6]:
# Checking the length of the dataframe
len(hotel_urls_df)

1000

In [7]:
hotel_urls_df["city_name"] = hotel_urls_df["booking_city_url"].apply(lambda x: re.split(r"=|&", x)[1])

In [8]:
hotel_urls_df["city_name"] = hotel_urls_df["city_name"].apply(lambda x: x.replace("+", " "))

In [9]:
hotel_urls_df = hotel_urls_df.drop("booking_city_url", axis=1)

In [10]:
hotel_urls_df.head()

Unnamed: 0,hotel_name,hotel_url,city_name
0,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,Ariege
1,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,Annecy
2,Les gîtes de Beille,https://www.booking.com/hotel/fr/les-gites-de-...,Ariege
3,Le petit nid,https://www.booking.com/hotel/fr/le-petit-nid-...,Ariege
4,Appartement Fontargente,https://www.booking.com/hotel/fr/appartement-f...,Ariege


In [11]:
hotel_urls_df["city_name"].unique()

array(['Ariege', 'Annecy', 'Avignon', 'Aigues Mortes', 'Bayeux',
       'Aix en Provence', 'Bayonne', 'Amiens', 'Bormes les Mimosas',
       'Besancon', 'Carcassonne', 'Chateau du Haut Koenigsbourg',
       'Biarritz', 'Cassis', 'Collioure', 'Colmar', 'Eguisheim',
       'Grenoble', 'Dijon', 'Gorges du Verdon', 'La Rochelle', 'Lyon',
       'Lille', 'Le Havre', 'Marseille', 'Montauban', 'Nimes',
       'Mont Saint Michel', 'Paris', 'Rouen', 'Strasbourg', 'St Malo',
       'Saintes Maries de la mer', 'Uzes', 'Toulouse'], dtype=object)

In [12]:
hotel_urls_df.columns

Index(['hotel_name', 'hotel_url', 'city_name'], dtype='object')

In [13]:
hotel_urls_df = hotel_urls_df[['city_name', 'hotel_name', 'hotel_url']]

In [14]:
hotel_urls_df.head()

Unnamed: 0,city_name,hotel_name,hotel_url
0,Ariege,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...
1,Annecy,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...
2,Ariege,Les gîtes de Beille,https://www.booking.com/hotel/fr/les-gites-de-...
3,Ariege,Le petit nid,https://www.booking.com/hotel/fr/le-petit-nid-...
4,Ariege,Appartement Fontargente,https://www.booking.com/hotel/fr/appartement-f...


In [15]:
# Checking number of unique urls, normaly each hotel has a unique url addres on booking:
hotel_urls_df["hotel_url"].nunique()

1000

In [16]:
len(hotel_urls_df)

1000

In [17]:
# Looking at the rows that contain duplicate urls
hotel_urls_df[hotel_urls_df.duplicated(['hotel_url'], keep=False)]


Unnamed: 0,city_name,hotel_name,hotel_url


In [18]:
# It turns that some hotels found for Colmar are the same as hotels found for Chateau du Haut Koenigsbourg.
# It is normal because Chateau du Haut Koenigsbourg is situated near Colmar.

In [19]:
# Loading .json file with hotels data
url = "https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/booking_hotels_data.json"
response_hotels_data = urlopen(url)
hotels = json.loads(response_hotels_data.read())

In [20]:
hotels_df = pd.DataFrame.from_records(hotels)

In [21]:
hotels_df["score"][hotels_df["score"].isnull()].index

Int64Index([ 41,  47,  57,  62,  67,  72,  91, 100, 113, 114, 159, 161, 163,
            164, 169, 188, 193, 222, 223, 230, 231, 234, 238, 244, 247, 249,
            251, 252, 257, 272, 273, 283, 285, 286, 321, 345, 346, 351, 357,
            363, 365, 366, 371, 373, 375, 398, 405, 414, 424, 428, 495, 502,
            563, 583, 591, 592, 601, 664, 669, 690, 692, 732, 740, 746, 749,
            754, 796, 800, 819, 836, 838, 878, 886, 901, 906, 909, 915, 917,
            918, 925, 926, 929, 930, 934, 935, 937, 939, 940, 941, 943, 944,
            954, 959, 982, 984],
           dtype='int64')

In [22]:
hotels_df.head(10)

Unnamed: 0,hotel_name,score,description,location,latitide,longtitude,hotel_url
0,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","\n8 Rue Camille Dunant, 74000 Annecy, France\n",45.8999407,6.1270546,<HtmlResponse 200 https://www.booking.com/hote...
1,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,\npremier étage 19 Avenue Docteur François Gom...,42.71847547,1.84064515,<HtmlResponse 200 https://www.booking.com/hote...
2,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"\nStation De Bonascre, 09110 Ax-les-Thermes, F...",42.70237131,1.81530745,<HtmlResponse 200 https://www.booking.com/hote...
3,Lagrange Vacances Les Chalets d’Ax,7.6,You're eligible for a Genius discount at Lagra...,\nQuartier De Castel Maou - Chemin d'Aouredou ...,42.71383156,1.83920681,<HtmlResponse 200 https://www.booking.com/hote...
4,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"\nChemin du Parc Le Palais des Evêques, 09190 ...",43.003051,1.13713,<HtmlResponse 200 https://www.booking.com/hote...
5,Chalet bois au milieu des Pyrénées,9.1,You're eligible for a Genius discount at Chale...,"\n3 Las Planos, 09390 LʼHospitalet-près-lʼAndo...",42.58990385,1.79836833,<HtmlResponse 200 https://www.booking.com/hote...
6,Le petit nid,9.2,You're eligible for a Genius discount at Le pe...,"\nRDC 1 Rue de la Place, 09330 Montgaillard, F...",42.93227802,1.63516787,<HtmlResponse 200 https://www.booking.com/hote...
7,Les gîtes de Beille,8.1,You're eligible for a Genius discount at Les g...,"\n25 Quartier la Bexane, 09310 Les Cabannes, F...",42.7855571,1.68166541,<HtmlResponse 200 https://www.booking.com/hote...
8,"Grange rénovée, Pyrénées Ariégeoises",9.3,You're eligible for a Genius discount at Grang...,"\nlieu dit LAUJOU, 09220 Auzat, France\n",42.72477458,1.45421439,<HtmlResponse 200 https://www.booking.com/hote...
9,"The Originals Access, Hôtel Foix (P'tit Dej-Ho...",7.9,You're eligible for a Genius discount at The O...,\nAncienne Rn 20 Lieu dit Le Couloumié - Dépar...,42.9921119,1.61583245,<HtmlResponse 200 https://www.booking.com/hote...


In [23]:
# Renaming columns for clarity
hotels_df.rename(columns = {'latitide':'hotel_latitude'}, inplace = True)
hotels_df.rename(columns = {'longtitude':'hotel_longtitude'}, inplace = True)

In [24]:
# the address of hotels contains html tags /n, we will use .strip() in  order to get rid of them
hotels_df["location"] = hotels_df["location"].apply(lambda x: str.strip(x))


In [25]:
hotels_df.head()

Unnamed: 0,hotel_name,score,description,location,hotel_latitude,hotel_longtitude,hotel_url
0,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546,<HtmlResponse 200 https://www.booking.com/hote...
1,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515,<HtmlResponse 200 https://www.booking.com/hote...
2,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"Station De Bonascre, 09110 Ax-les-Thermes, France",42.70237131,1.81530745,<HtmlResponse 200 https://www.booking.com/hote...
3,Lagrange Vacances Les Chalets d’Ax,7.6,You're eligible for a Genius discount at Lagra...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,<HtmlResponse 200 https://www.booking.com/hote...
4,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"Chemin du Parc Le Palais des Evêques, 09190 Sa...",43.003051,1.13713,<HtmlResponse 200 https://www.booking.com/hote...


In [26]:
hotels_df["location"] = hotels_df["location"].apply(lambda x: str.strip(x))

In [27]:
hotels_df.head()

Unnamed: 0,hotel_name,score,description,location,hotel_latitude,hotel_longtitude,hotel_url
0,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546,<HtmlResponse 200 https://www.booking.com/hote...
1,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515,<HtmlResponse 200 https://www.booking.com/hote...
2,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"Station De Bonascre, 09110 Ax-les-Thermes, France",42.70237131,1.81530745,<HtmlResponse 200 https://www.booking.com/hote...
3,Lagrange Vacances Les Chalets d’Ax,7.6,You're eligible for a Genius discount at Lagra...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,<HtmlResponse 200 https://www.booking.com/hote...
4,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"Chemin du Parc Le Palais des Evêques, 09190 Sa...",43.003051,1.13713,<HtmlResponse 200 https://www.booking.com/hote...


In [28]:
hotels_df["description"] = hotels_df["description"].apply(lambda x: x.split("To save at this property, all you have to do is  .")[1] if 'Genius discount' in x else x)

In [29]:
hotels_df["hotel_url"] = hotels_df["hotel_url"].apply(lambda x: x.strip("<HtmlResponse 200 ").split("?")[0])

In [30]:
hotels_df.head()

Unnamed: 0,hotel_name,score,description,location,hotel_latitude,hotel_longtitude,hotel_url
0,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546,https://www.booking.com/hotel/fr/guestready-to...
1,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515,https://www.booking.com/hotel/fr/appartement-f...
2,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"Station De Bonascre, 09110 Ax-les-Thermes, France",42.70237131,1.81530745,https://www.booking.com/hotel/fr/residence-les...
3,Lagrange Vacances Les Chalets d’Ax,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,https://www.booking.com/hotel/fr/residence-les...
4,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"Chemin du Parc Le Palais des Evêques, 09190 Sa...",43.003051,1.13713,https://www.booking.com/hotel/fr/terres-de-fra...


In [31]:
hotels_df.head()

Unnamed: 0,hotel_name,score,description,location,hotel_latitude,hotel_longtitude,hotel_url
0,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546,https://www.booking.com/hotel/fr/guestready-to...
1,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515,https://www.booking.com/hotel/fr/appartement-f...
2,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"Station De Bonascre, 09110 Ax-les-Thermes, France",42.70237131,1.81530745,https://www.booking.com/hotel/fr/residence-les...
3,Lagrange Vacances Les Chalets d’Ax,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,https://www.booking.com/hotel/fr/residence-les...
4,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"Chemin du Parc Le Palais des Evêques, 09190 Sa...",43.003051,1.13713,https://www.booking.com/hotel/fr/terres-de-fra...


In [32]:
hotels_df["hotel_url"] = hotels_df["hotel_url"].apply(lambda x: x.replace("en-gb.", "") if "en-gb." in x else x)

In [33]:
hotels_df.head()

Unnamed: 0,hotel_name,score,description,location,hotel_latitude,hotel_longtitude,hotel_url
0,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546,https://www.booking.com/hotel/fr/guestready-to...
1,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515,https://www.booking.com/hotel/fr/appartement-f...
2,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"Station De Bonascre, 09110 Ax-les-Thermes, France",42.70237131,1.81530745,https://www.booking.com/hotel/fr/residence-les...
3,Lagrange Vacances Les Chalets d’Ax,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,https://www.booking.com/hotel/fr/residence-les...
4,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"Chemin du Parc Le Palais des Evêques, 09190 Sa...",43.003051,1.13713,https://www.booking.com/hotel/fr/terres-de-fra...


In [34]:
# Looking at the rows that contain duplicate urls
hotels_df[hotels_df.duplicated(['hotel_url'], keep=False)]

Unnamed: 0,hotel_name,score,description,location,hotel_latitude,hotel_longtitude,hotel_url


In [35]:
hotels_df.columns

Index(['hotel_name', 'score', 'description', 'location', 'hotel_latitude',
       'hotel_longtitude', 'hotel_url'],
      dtype='object')

In [36]:
hotels_df = hotels_df[['hotel_url', 'hotel_name', 'score', 'description', 'location', 'hotel_latitude',
       'hotel_longtitude']]

In [37]:
hotels_df.head()

Unnamed: 0,hotel_url,hotel_name,score,description,location,hotel_latitude,hotel_longtitude
0,https://www.booking.com/hotel/fr/guestready-to...,GuestReady - Cozy perfection in the city centre,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
1,https://www.booking.com/hotel/fr/appartement-f...,Appartement Fontargente,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515
2,https://www.booking.com/hotel/fr/residence-les...,Résidence Néméa Les Balcons d'Ax,7.1,Résidence Néméa Les Balcons d'Ax is located in...,"Station De Bonascre, 09110 Ax-les-Thermes, France",42.70237131,1.81530745
3,https://www.booking.com/hotel/fr/residence-les...,Lagrange Vacances Les Chalets d’Ax,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
4,https://www.booking.com/hotel/fr/terres-de-fra...,Terres de France - Domaine du Palais,7.5,Located in Saint-Lizier in the Midi-Pyrénées r...,"Chemin du Parc Le Palais des Evêques, 09190 Sa...",43.003051,1.13713


In [38]:
hotel_info_df = hotel_urls_df.merge(hotels_df, how="left")

In [39]:
hotel_info_df["city_name"] = hotel_info_df["city_name"].apply(lambda x: x.replace("St ", "Saint ") if "St " in x else x)

In [40]:
hotel_info_df

Unnamed: 0,city_name,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
0,Ariege,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
1,Annecy,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.89994070,6.12705460
2,Ariege,Les gîtes de Beille,https://www.booking.com/hotel/fr/les-gites-de-...,8.1,Located in Les Cabannes in the Midi-Pyrénées ...,"25 Quartier la Bexane, 09310 Les Cabannes, France",42.78555710,1.68166541
3,Ariege,Le petit nid,https://www.booking.com/hotel/fr/le-petit-nid-...,9.2,"Offering garden views, Le petit nid is an acc...","RDC 1 Rue de la Place, 09330 Montgaillard, France",42.93227802,1.63516787
4,Ariege,Appartement Fontargente,https://www.booking.com/hotel/fr/appartement-f...,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515
...,...,...,...,...,...,...,...,...
995,Toulouse,Les Toits du Capitole - Loft de standing 65 m2...,https://www.booking.com/hotel/fr/les-toits-du-...,9.2,Les Toits du Capitole is a spacious and moder...,"6, rue des Lois, 31000 Toulouse, France",43.60510201,1.44236863
996,Toulouse,La Parenthèse cosy - Appartement climatisé en ...,https://www.booking.com/hotel/fr/le-lafayette-...,8.3,"Ideally situated in the centre of Toulouse, L...","20 Rue Lafayette, 31000 Toulouse, France",43.60484808,1.44617052
997,Toulouse,T2 COSY - CAPITOLE - CALME - Wi-Fi - NETFLIX,https://www.booking.com/hotel/fr/studio-cosy-h...,8.7,"Situated in the centre of Toulouse, 3.7 km fro...","1er étage 44 Rue Saint-Rome, 31000 Toulouse, F...",43.60330820,1.44347870
998,Toulouse,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.60333730,1.44330860


In [41]:
hotel_info_df["city_name"].value_counts()

Paris                           35
Besancon                        35
La Rochelle                     35
Mont Saint Michel               35
Biarritz                        35
Le Havre                        35
Aix en Provence                 35
Bayeux                          35
Bayonne                         30
Saint Malo                      30
Lille                           30
Saintes Maries de la mer        30
Collioure                       30
Colmar                          30
Dijon                           30
Uzes                            30
Ariege                          30
Strasbourg                      25
Rouen                           25
Nimes                           25
Montauban                       25
Marseille                       25
Grenoble                        25
Lyon                            25
Gorges du Verdon                25
Annecy                          25
Eguisheim                       25
Cassis                          25
Chateau du Haut Koen

In [42]:
hotel_info_df["hotel_url"].nunique()

1000

In [43]:
# Checking for duplicate rows
hotel_info_df[hotel_info_df.duplicated(['hotel_url'], keep=False)]

Unnamed: 0,city_name,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude


In [44]:
hotel_info_df["city_name"].unique()

array(['Ariege', 'Annecy', 'Avignon', 'Aigues Mortes', 'Bayeux',
       'Aix en Provence', 'Bayonne', 'Amiens', 'Bormes les Mimosas',
       'Besancon', 'Carcassonne', 'Chateau du Haut Koenigsbourg',
       'Biarritz', 'Cassis', 'Collioure', 'Colmar', 'Eguisheim',
       'Grenoble', 'Dijon', 'Gorges du Verdon', 'La Rochelle', 'Lyon',
       'Lille', 'Le Havre', 'Marseille', 'Montauban', 'Nimes',
       'Mont Saint Michel', 'Paris', 'Rouen', 'Strasbourg', 'Saint Malo',
       'Saintes Maries de la mer', 'Uzes', 'Toulouse'], dtype=object)

In [45]:
# As seen above, 

In [46]:
len(hotels_df)

1000

In [47]:
len(hotel_urls)

1000

In [48]:
len(hotel_info_df)

1000

In [49]:
hotel_info_df.isnull().sum()

city_name            0
hotel_name           0
hotel_url            0
score               95
description          0
location             0
hotel_latitude       0
hotel_longtitude     0
dtype: int64

In [50]:
hotel_info_df[hotel_info_df["score"].isnull()]

Unnamed: 0,city_name,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
41,Annecy,Le Factory : Duplex en vieille ville côté lac,https://www.booking.com/hotel/fr/le-factory-du...,,"Offering free WiFi, Le Factory: Duplex en viei...","9 Rue Grenette, 74000 Annecy, France",45.89910140,6.12708550
48,Annecy,Appartement de 2 chambres avec wifi a Annecy,https://www.booking.com/hotel/fr/apartment-rue...,,"Located in the centre of Annecy, 37 km from Ro...","23 Rue Notre Dame Haute-Savoie, 74000 Annecy, ...",45.89997080,6.12629850
56,Avignon,Le Saboly,https://www.booking.com/hotel/fr/le-saboly-avi...,,Situated less than 1 km from Avignon Central S...,"2B Place Nicolas Saboly, 84000 Avignon, France",43.94877820,4.80732660
62,Avignon,"Les Logis de Halley, au cœur de la cité des Papes",https://www.booking.com/hotel/fr/au-coeur-de-l...,,"Set in the centre of Avignon, just 400 m from ...","2ème étage 2 Rue Edmond Halley, 84000 Avignon,...",43.94883313,4.80700983
66,Avignon,Loft hyper centre,https://www.booking.com/hotel/fr/loft-hyper-ce...,,"Situated in the centre of Avignon, just 400 m ...","4 Rue de la Rappe, 84000 Avignon, France",43.94848300,4.80745830
...,...,...,...,...,...,...,...,...
944,Saintes Maries de la mer,"Appartement Saintes-Maries-de-la-Mer, 2 pièces...",https://www.booking.com/hotel/fr/appartement-s...,,"Situated 600 m from Amphora Beach, 600 m from ...","Les Impériaux II n°9 rue du vibre, 13460 Saint...",43.45381260,4.42963600
956,Uzes,"Charme et confort au calme, au coeur historiqu...",https://www.booking.com/hotel/fr/charme-et-con...,,"Charme et confort au calme, au coeur historiqu...","2 Rue de la Calade, 30700 Uzès, France",44.01248800,4.42089160
960,Uzes,Le 6 Bis by Les Cailloux Dorés,https://www.booking.com/hotel/fr/le-6-bis-uzes...,,"Located in Uzès, 31 km from Parc Expo Nîmes, 3...","6 bis Rue Paul Foussat, 30700 Uzès, France",44.01120400,4.42000740
981,Toulouse,Exceptionnel Appartement Terasse,https://www.booking.com/hotel/fr/exceptionnel-...,,"Conveniently located in the heart of Toulouse,...","23 Rue Lafayette, 31000 Toulouse, France",43.60496740,1.44538820


In [51]:
none_score_indexes = hotel_info_df[hotel_info_df["score"].isnull()].index

In [52]:
for index in none_score_indexes:
    print(hotel_info_df["description"].iloc[index])
    print()
# it turns out 

Offering free WiFi, Le Factory: Duplex en vieille ville côté lac is set in the centre of Annecy within 37 km of Rochexpo and 38 km from Bourget Lake. This apartment is 46 km from Gare de Cornavin and 46 km from St. Pierre Cathedral. The 2-bedroom apartment has a living room with a flat-screen TV, a fully equipped kitchen with fridge and oven, and 1 bathroom with a hairdryer. Towels and bed linen are offered in the apartment. Stade de Genève is 43 km from the apartment, while Jet d'Eau is 46 km from the property. The nearest airport is Chambéry-Savoie Airport, 43 km from Le Factory: Duplex en vieille ville côté lac. 

Located in the centre of Annecy, 37 km from Rochexpo and 38 km from Bourget Lake, Appartement de 2 chambres avec wifi a Annecy offers free WiFi. This apartment is 43 km from Stade de Genève and 46 km from Jet d'Eau. The apartment includes 2 bedrooms, a kitchen with a fridge and an oven, as well as a kettle. Gare de Cornavin is 46 km from the apartment, while St. Pierre Cat

As can be seen by descriptions, the properties without a score are most often private apartments or villas. If they have appeared on the site not long ago, it is normal that they don't have a score yet.

In [53]:
hotel_info_df['hotel_url'].nunique()

1000

In [54]:
hotel_info_df = hotel_info_df.drop_duplicates().reset_index(drop=True)

In [55]:
len(hotel_info_df)

1000

In [56]:
hotel_info_df.to_csv("hotels.csv")

In [57]:
len(hotel_info_df)

1000

In [58]:
# Loading .csv file with city coordinates from S3 bucket
url = 'https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/city_coordinates.csv'
city_coord = pd.read_csv(url)
city_coord.head(5)

Unnamed: 0,place_id,city_name,lat,lon
0,156094680,Mont Saint-Michel,48.635954,-1.51146
1,297756747,Saint-Malo,48.649518,-2.026041
2,297981358,Bayeux,49.276462,-0.702474
3,298137491,Le Havre,49.493898,0.107973
4,297518815,Rouen,49.440459,1.093966


In [59]:
city_coord["city_name"].unique()

array(['Mont Saint-Michel', 'Saint-Malo', 'Bayeux', 'Le Havre', 'Rouen',
       'Paris', 'Amiens', 'Lille', 'Strasbourg',
       'Château du Haut-Kœnigsbourg', 'Colmar', 'Eguisheim', 'Besançon',
       'Dijon', 'Annecy', 'Grenoble', 'Lyon', 'Gorges du Verdon',
       'Bormes-les-Mimosas', 'Cassis', 'Marseilla', 'Aix-en-Provence',
       'Avignon', 'Uzès', 'Nîmes', 'Aigues-Mortes',
       'Saintes-Maries-de-la-Mer', 'Collioure', 'Carcassonne', 'Ariège',
       'Toulouse', 'Montauban', 'Biarritz', 'Bayonne', 'La Rochelle'],
      dtype=object)

In [60]:
len(city_coord["city_name"].unique())

35

In [61]:
city_names_coord= city_coord["city_name"].tolist()

In [62]:
city_names_coord.sort()

In [63]:
len(city_names_coord)

35

In [64]:
hotel_info_df["city_name"].unique().tolist()

['Ariege',
 'Annecy',
 'Avignon',
 'Aigues Mortes',
 'Bayeux',
 'Aix en Provence',
 'Bayonne',
 'Amiens',
 'Bormes les Mimosas',
 'Besancon',
 'Carcassonne',
 'Chateau du Haut Koenigsbourg',
 'Biarritz',
 'Cassis',
 'Collioure',
 'Colmar',
 'Eguisheim',
 'Grenoble',
 'Dijon',
 'Gorges du Verdon',
 'La Rochelle',
 'Lyon',
 'Lille',
 'Le Havre',
 'Marseille',
 'Montauban',
 'Nimes',
 'Mont Saint Michel',
 'Paris',
 'Rouen',
 'Strasbourg',
 'Saint Malo',
 'Saintes Maries de la mer',
 'Uzes',
 'Toulouse']

In [65]:
city_names_hotel = hotel_info_df["city_name"].unique().tolist()

In [66]:
len(city_names_hotel)

35

In [67]:
city_names_hotel.sort()

In [68]:
len(city_names_hotel)

35

In [69]:
print(city_names_coord)

print(city_names_hotel)

['Aigues-Mortes', 'Aix-en-Provence', 'Amiens', 'Annecy', 'Ariège', 'Avignon', 'Bayeux', 'Bayonne', 'Besançon', 'Biarritz', 'Bormes-les-Mimosas', 'Carcassonne', 'Cassis', 'Château du Haut-Kœnigsbourg', 'Collioure', 'Colmar', 'Dijon', 'Eguisheim', 'Gorges du Verdon', 'Grenoble', 'La Rochelle', 'Le Havre', 'Lille', 'Lyon', 'Marseilla', 'Mont Saint-Michel', 'Montauban', 'Nîmes', 'Paris', 'Rouen', 'Saint-Malo', 'Saintes-Maries-de-la-Mer', 'Strasbourg', 'Toulouse', 'Uzès']
['Aigues Mortes', 'Aix en Provence', 'Amiens', 'Annecy', 'Ariege', 'Avignon', 'Bayeux', 'Bayonne', 'Besancon', 'Biarritz', 'Bormes les Mimosas', 'Carcassonne', 'Cassis', 'Chateau du Haut Koenigsbourg', 'Collioure', 'Colmar', 'Dijon', 'Eguisheim', 'Gorges du Verdon', 'Grenoble', 'La Rochelle', 'Le Havre', 'Lille', 'Lyon', 'Marseille', 'Mont Saint Michel', 'Montauban', 'Nimes', 'Paris', 'Rouen', 'Saint Malo', 'Saintes Maries de la mer', 'Strasbourg', 'Toulouse', 'Uzes']


In [70]:
city_names_coord = [city.replace("Marseilla", "Marseille") for city in city_names_coord]


In [71]:
print(city_names_coord)

print(city_names_hotel)

['Aigues-Mortes', 'Aix-en-Provence', 'Amiens', 'Annecy', 'Ariège', 'Avignon', 'Bayeux', 'Bayonne', 'Besançon', 'Biarritz', 'Bormes-les-Mimosas', 'Carcassonne', 'Cassis', 'Château du Haut-Kœnigsbourg', 'Collioure', 'Colmar', 'Dijon', 'Eguisheim', 'Gorges du Verdon', 'Grenoble', 'La Rochelle', 'Le Havre', 'Lille', 'Lyon', 'Marseille', 'Mont Saint-Michel', 'Montauban', 'Nîmes', 'Paris', 'Rouen', 'Saint-Malo', 'Saintes-Maries-de-la-Mer', 'Strasbourg', 'Toulouse', 'Uzès']
['Aigues Mortes', 'Aix en Provence', 'Amiens', 'Annecy', 'Ariege', 'Avignon', 'Bayeux', 'Bayonne', 'Besancon', 'Biarritz', 'Bormes les Mimosas', 'Carcassonne', 'Cassis', 'Chateau du Haut Koenigsbourg', 'Collioure', 'Colmar', 'Dijon', 'Eguisheim', 'Gorges du Verdon', 'Grenoble', 'La Rochelle', 'Le Havre', 'Lille', 'Lyon', 'Marseille', 'Mont Saint Michel', 'Montauban', 'Nimes', 'Paris', 'Rouen', 'Saint Malo', 'Saintes Maries de la mer', 'Strasbourg', 'Toulouse', 'Uzes']


In [72]:
print(list(zip(city_names_hotel, city_names_coord)))

[('Aigues Mortes', 'Aigues-Mortes'), ('Aix en Provence', 'Aix-en-Provence'), ('Amiens', 'Amiens'), ('Annecy', 'Annecy'), ('Ariege', 'Ariège'), ('Avignon', 'Avignon'), ('Bayeux', 'Bayeux'), ('Bayonne', 'Bayonne'), ('Besancon', 'Besançon'), ('Biarritz', 'Biarritz'), ('Bormes les Mimosas', 'Bormes-les-Mimosas'), ('Carcassonne', 'Carcassonne'), ('Cassis', 'Cassis'), ('Chateau du Haut Koenigsbourg', 'Château du Haut-Kœnigsbourg'), ('Collioure', 'Collioure'), ('Colmar', 'Colmar'), ('Dijon', 'Dijon'), ('Eguisheim', 'Eguisheim'), ('Gorges du Verdon', 'Gorges du Verdon'), ('Grenoble', 'Grenoble'), ('La Rochelle', 'La Rochelle'), ('Le Havre', 'Le Havre'), ('Lille', 'Lille'), ('Lyon', 'Lyon'), ('Marseille', 'Marseille'), ('Mont Saint Michel', 'Mont Saint-Michel'), ('Montauban', 'Montauban'), ('Nimes', 'Nîmes'), ('Paris', 'Paris'), ('Rouen', 'Rouen'), ('Saint Malo', 'Saint-Malo'), ('Saintes Maries de la mer', 'Saintes-Maries-de-la-Mer'), ('Strasbourg', 'Strasbourg'), ('Toulouse', 'Toulouse'), ('Uz

In [73]:
"""for i in range(0, len(hotel_info_df)):
    if hotel_info_df["city_name"].loc[i] in city_names:
        index = city_names.index(hotel_info_df["city_name"].loc[i])
        hotel_info_df["city_name"].loc[i].replace(hotel_info_df["city_name"].loc[i], city_list[index])
    else:
        hotel_info_df["city_name"].loc[i].replace(hotel_info_df["city_name"].loc[i], "None")"""

'for i in range(0, len(hotel_info_df)):\n    if hotel_info_df["city_name"].loc[i] in city_names:\n        index = city_names.index(hotel_info_df["city_name"].loc[i])\n        hotel_info_df["city_name"].loc[i].replace(hotel_info_df["city_name"].loc[i], city_list[index])\n    else:\n        hotel_info_df["city_name"].loc[i].replace(hotel_info_df["city_name"].loc[i], "None")'

In [74]:
for i in range(0, len(hotel_info_df["city_name"])):
    if hotel_info_df["city_name"][i] in city_names_hotel:
        index = city_names_hotel.index(hotel_info_df["city_name"].loc[i])
        hotel_info_df["city_name"] = hotel_info_df["city_name"].replace(hotel_info_df["city_name"][i], city_names_coord[index])
    else: 
        pass
        

In [75]:
hotel_info_df.head()

Unnamed: 0,city_name,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
0,Ariège,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
1,Annecy,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
2,Ariège,Les gîtes de Beille,https://www.booking.com/hotel/fr/les-gites-de-...,8.1,Located in Les Cabannes in the Midi-Pyrénées ...,"25 Quartier la Bexane, 09310 Les Cabannes, France",42.7855571,1.68166541
3,Ariège,Le petit nid,https://www.booking.com/hotel/fr/le-petit-nid-...,9.2,"Offering garden views, Le petit nid is an acc...","RDC 1 Rue de la Place, 09330 Montgaillard, France",42.93227802,1.63516787
4,Ariège,Appartement Fontargente,https://www.booking.com/hotel/fr/appartement-f...,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515


In [76]:
hotel_info_df.tail()

Unnamed: 0,city_name,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
995,Toulouse,Les Toits du Capitole - Loft de standing 65 m2...,https://www.booking.com/hotel/fr/les-toits-du-...,9.2,Les Toits du Capitole is a spacious and moder...,"6, rue des Lois, 31000 Toulouse, France",43.60510201,1.44236863
996,Toulouse,La Parenthèse cosy - Appartement climatisé en ...,https://www.booking.com/hotel/fr/le-lafayette-...,8.3,"Ideally situated in the centre of Toulouse, L...","20 Rue Lafayette, 31000 Toulouse, France",43.60484808,1.44617052
997,Toulouse,T2 COSY - CAPITOLE - CALME - Wi-Fi - NETFLIX,https://www.booking.com/hotel/fr/studio-cosy-h...,8.7,"Situated in the centre of Toulouse, 3.7 km fro...","1er étage 44 Rue Saint-Rome, 31000 Toulouse, F...",43.6033082,1.4434787
998,Toulouse,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,1.4433086
999,Toulouse,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.6060322,1.44491941


In [77]:
hotel_info_df["city_name"].nunique()

35

In [78]:
len(city_names_coord)

35

In [79]:
city_coord.columns

Index(['place_id', 'city_name', 'lat', 'lon'], dtype='object')

In [80]:
city_coord = city_coord[['place_id',  'lat', 'lon', 'city_name']]

In [81]:
city_coord.head()

Unnamed: 0,place_id,lat,lon,city_name
0,156094680,48.635954,-1.51146,Mont Saint-Michel
1,297756747,48.649518,-2.026041,Saint-Malo
2,297981358,49.276462,-0.702474,Bayeux
3,298137491,49.493898,0.107973,Le Havre
4,297518815,49.440459,1.093966,Rouen


In [82]:
city_and_hotel_info_df = city_coord.merge(hotel_info_df, how="right")

In [83]:
city_and_hotel_info_df.columns

Index(['place_id', 'lat', 'lon', 'city_name', 'hotel_name', 'hotel_url',
       'score', 'description', 'location', 'hotel_latitude',
       'hotel_longtitude'],
      dtype='object')

In [84]:
city_and_hotel_info_df = city_and_hotel_info_df[['city_name', 'place_id', 'lat', 'lon', 'hotel_name', 'hotel_url',
       'score', 'description', 'location', 'hotel_latitude', 'hotel_longtitude']]

In [85]:
city_and_hotel_info_df.head()

Unnamed: 0,city_name,place_id,lat,lon,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
2,Ariège,297389050.0,42.945537,1.406554,Les gîtes de Beille,https://www.booking.com/hotel/fr/les-gites-de-...,8.1,Located in Les Cabannes in the Midi-Pyrénées ...,"25 Quartier la Bexane, 09310 Les Cabannes, France",42.7855571,1.68166541
3,Ariège,297389050.0,42.945537,1.406554,Le petit nid,https://www.booking.com/hotel/fr/le-petit-nid-...,9.2,"Offering garden views, Le petit nid is an acc...","RDC 1 Rue de la Place, 09330 Montgaillard, France",42.93227802,1.63516787
4,Ariège,297389050.0,42.945537,1.406554,Appartement Fontargente,https://www.booking.com/hotel/fr/appartement-f...,9.2,Appartement Fontargente is located in Ax-les-T...,premier étage 19 Avenue Docteur François Gomma...,42.71847547,1.84064515


In [86]:
nb_hotels = len(city_and_hotel_info_df)

In [87]:
nb_hotels

1000

We need to add 7-day weather forecast for each hotel based on the city where the hotel is situated.

In [88]:
city_and_hotel_info_df = city_and_hotel_info_df.loc[city_and_hotel_info_df.index.repeat(7)]

In [89]:
city_and_hotel_info_df.head(10)

Unnamed: 0,city_name,place_id,lat,lon,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546


We wil now create a key that will later help us join the dataframe with the hotel information and the dataframe with the weather information.

In [90]:
city_and_hotel_info_df.head(10)


Unnamed: 0,city_name,place_id,lat,lon,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546
1,Annecy,298516920.0,45.899235,6.128885,GuestReady - Cozy perfection in the city centre,https://www.booking.com/hotel/fr/guestready-to...,8.0,"Located in the centre of Annecy, 37 km from Ro...","8 Rue Camille Dunant, 74000 Annecy, France",45.8999407,6.1270546


In [91]:
days = [1, 2, 3, 4, 5, 6, 7]
day_numbers = days * nb_hotels
city_and_hotel_info_df["day_in_city"] = day_numbers
city_and_hotel_info_df["day_in_city"] = city_and_hotel_info_df['city_name'] + " day "+ city_and_hotel_info_df["day_in_city"].astype(str)

    

In [92]:
city_and_hotel_info_df.head()

Unnamed: 0,city_name,place_id,lat,lon,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude,day_in_city
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 1
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 2
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 3
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 4
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 5


In [93]:
city_and_hotel_info_df

Unnamed: 0,city_name,place_id,lat,lon,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude,day_in_city
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 1
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 2
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 3
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 4
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.71383156,1.83920681,Ariège day 5
...,...,...,...,...,...,...,...,...,...,...,...,...
999,Toulouse,298222566.0,43.604462,1.444247,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.60603220,1.44491941,Toulouse day 3
999,Toulouse,298222566.0,43.604462,1.444247,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.60603220,1.44491941,Toulouse day 4
999,Toulouse,298222566.0,43.604462,1.444247,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.60603220,1.44491941,Toulouse day 5
999,Toulouse,298222566.0,43.604462,1.444247,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.60603220,1.44491941,Toulouse day 6


In [94]:
len(city_and_hotel_info_df)

7000

In [95]:
# Loading .csv file with weather data from S3 bucket
url = 'https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/weather_forecast.csv'
weather_df = pd.read_csv(url)
weather_df.head(5)

Unnamed: 0.1,Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,city_name
0,0,2023-02-20 13:00:00,2023-02-20 08:05:53,2023-02-20 18:34:05,1028,71,5.56,2.06,208,2.25,...,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky,Mont Saint-Michel
1,1,2023-02-21 13:00:00,2023-02-21 08:04:04,2023-02-21 18:35:42,1018,64,5.72,2.67,139,4.01,...,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds,Mont Saint-Michel
2,2,2023-02-22 13:00:00,2023-02-22 08:02:13,2023-02-22 18:37:18,1015,88,5.93,6.78,318,10.43,...,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain,Mont Saint-Michel
3,3,2023-02-23 13:00:00,2023-02-23 08:00:22,2023-02-23 18:38:55,1018,76,3.98,7.16,31,12.58,...,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain,Mont Saint-Michel
4,4,2023-02-24 13:00:00,2023-02-24 07:58:29,2023-02-24 18:40:31,1015,67,3.02,6.72,29,12.65,...,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds,Mont Saint-Michel


In [96]:
len(weather_df)

245

In [97]:
weather_df.head()

Unnamed: 0.1,Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,city_name
0,0,2023-02-20 13:00:00,2023-02-20 08:05:53,2023-02-20 18:34:05,1028,71,5.56,2.06,208,2.25,...,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky,Mont Saint-Michel
1,1,2023-02-21 13:00:00,2023-02-21 08:04:04,2023-02-21 18:35:42,1018,64,5.72,2.67,139,4.01,...,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds,Mont Saint-Michel
2,2,2023-02-22 13:00:00,2023-02-22 08:02:13,2023-02-22 18:37:18,1015,88,5.93,6.78,318,10.43,...,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain,Mont Saint-Michel
3,3,2023-02-23 13:00:00,2023-02-23 08:00:22,2023-02-23 18:38:55,1018,76,3.98,7.16,31,12.58,...,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain,Mont Saint-Michel
4,4,2023-02-24 13:00:00,2023-02-24 07:58:29,2023-02-24 18:40:31,1015,67,3.02,6.72,29,12.65,...,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds,Mont Saint-Michel


In [98]:
nb_cities = weather_df["city_name"].nunique()
days = [1, 2, 3, 4, 5, 6, 7]
day_numbers = days * nb_cities
weather_df["day_in_city"] = day_numbers
weather_df["day_in_city"] = weather_df['city_name'] + " day "+ weather_df["day_in_city"].astype(str)


In [99]:
weather_df.tail(15)

Unnamed: 0.1,Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,...,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,city_name,day_in_city
230,230,2023-02-26 13:00:00,2023-02-26 07:48:33,2023-02-26 18:50:09,1017,61,-0.52,5.99,45,9.68,...,1.62,4.23,0.91,3.59,-1.82,801.0,Clouds,few clouds,Biarritz,Biarritz day 7
231,231,2023-02-20 13:00:00,2023-02-20 07:57:43,2023-02-20 18:41:57,1023,58,8.92,3.61,84,6.46,...,8.97,16.89,12.42,18.0,7.49,804.0,Clouds,overcast clouds,Bayonne,Bayonne day 1
232,232,2023-02-21 13:00:00,2023-02-21 07:56:11,2023-02-21 18:43:17,1020,64,8.3,3.83,273,7.9,...,11.66,14.48,11.73,11.6,10.71,804.0,Clouds,overcast clouds,Bayonne,Bayonne day 2
233,233,2023-02-22 13:00:00,2023-02-22 07:54:38,2023-02-22 18:44:36,1020,89,10.64,8.89,338,12.12,...,11.39,12.25,5.98,10.56,11.01,502.0,Rain,heavy intensity rain,Bayonne,Bayonne day 3
234,234,2023-02-23 13:00:00,2023-02-23 07:53:04,2023-02-23 18:45:54,1016,83,4.51,7.34,318,12.07,...,7.74,7.43,5.45,6.83,5.49,502.0,Rain,heavy intensity rain,Bayonne,Bayonne day 4
235,235,2023-02-24 13:00:00,2023-02-24 07:51:29,2023-02-24 18:47:13,1012,80,4.42,3.16,220,4.68,...,4.45,5.84,3.06,7.48,4.45,500.0,Rain,light rain,Bayonne,Bayonne day 5
236,236,2023-02-25 13:00:00,2023-02-25 07:49:53,2023-02-25 18:48:31,1010,89,3.79,3.7,39,7.52,...,3.58,5.66,3.83,7.28,0.82,500.0,Rain,light rain,Bayonne,Bayonne day 6
237,237,2023-02-26 13:00:00,2023-02-26 07:48:16,2023-02-26 18:49:49,1016,58,-0.62,5.17,45,10.54,...,1.1,5.35,0.84,3.59,-1.67,801.0,Clouds,few clouds,Bayonne,Bayonne day 7
238,238,2023-02-20 13:00:00,2023-02-20 08:00:24,2023-02-20 18:36:42,1026,67,6.39,4.79,59,6.65,...,6.3,11.54,7.07,12.69,3.52,801.0,Clouds,few clouds,La Rochelle,La Rochelle day 1
239,239,2023-02-21 13:00:00,2023-02-21 07:58:43,2023-02-21 18:38:10,1017,73,7.61,4.75,231,8.03,...,8.7,11.58,7.61,9.97,6.38,804.0,Clouds,overcast clouds,La Rochelle,La Rochelle day 2


In [100]:
weather_df.columns

Index(['Unnamed: 0', 'dt', 'sunrise', 'sunset', 'pressure', 'humidity',
       'dew_point', 'wind_speed', 'wind_deg', 'wind_gust', 'clouds', 'pop',
       'uvi', 'rain', 'snow', 'temp_day', 'temp_min', 'temp_max', 'temp_night',
       'temp_eve', 'temp_morn', 'feels_like_day', 'feels_like_night',
       'feels_like_eve', 'feels_like_morn', 'weather_id', 'weather_main',
       'weather_description', 'city_name', 'day_in_city'],
      dtype='object')

We change the column order to be able to merge dataframes easily.
We will keep 'city_name' column in both dataframes in order to check later if the merge has been done correctly.

In [101]:
weather_df = weather_df[['day_in_city', 'city_name', 'dt', 'sunrise', 'sunset', 'pressure', 'humidity',
       'dew_point', 'wind_speed', 'wind_deg', 'wind_gust', 'clouds', 'pop',
       'uvi', 'rain', 'snow', 'temp_day', 'temp_min', 'temp_max', 'temp_night',
       'temp_eve', 'temp_morn', 'feels_like_day', 'feels_like_night',
       'feels_like_eve', 'feels_like_morn', 'weather_id', 'weather_main',
       'weather_description']]

In [102]:
weather_df.head()

Unnamed: 0,day_in_city,city_name,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,...,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description
0,Mont Saint-Michel day 1,Mont Saint-Michel,2023-02-20 13:00:00,2023-02-20 08:05:53,2023-02-20 18:34:05,1028,71,5.56,2.06,208,...,7.4,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky
1,Mont Saint-Michel day 2,Mont Saint-Michel,2023-02-21 13:00:00,2023-02-21 08:04:04,2023-02-21 18:35:42,1018,64,5.72,2.67,139,...,8.66,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds
2,Mont Saint-Michel day 3,Mont Saint-Michel,2023-02-22 13:00:00,2023-02-22 08:02:13,2023-02-22 18:37:18,1015,88,5.93,6.78,318,...,4.48,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain
3,Mont Saint-Michel day 4,Mont Saint-Michel,2023-02-23 13:00:00,2023-02-23 08:00:22,2023-02-23 18:38:55,1018,76,3.98,7.16,31,...,6.18,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain
4,Mont Saint-Michel day 5,Mont Saint-Michel,2023-02-24 13:00:00,2023-02-24 07:58:29,2023-02-24 18:40:31,1015,67,3.02,6.72,29,...,4.61,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds


In [103]:
kayak_df = city_and_hotel_info_df.merge(weather_df, how="left")

In [104]:
len(kayak_df)

7000

In [105]:
kayak_df.tail(15)

Unnamed: 0,city_name,place_id,lat,lon,hotel_name,hotel_url,score,description,location,hotel_latitude,...,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description
6985,Toulouse,298222566.0,43.604462,1.444247,T2 COSY - CAPITOLE - CALME - Wi-Fi - NETFLIX,https://www.booking.com/hotel/fr/studio-cosy-h...,8.7,"Situated in the centre of Toulouse, 3.7 km fro...","1er étage 44 Rue Saint-Rome, 31000 Toulouse, F...",43.6033082,...,2.11,4.9,-0.17,2.85,-2.1,1.67,-4.49,801.0,Clouds,few clouds
6986,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,9.74,17.64,6.98,14.7,7.29,16.69,6.98,802.0,Clouds,scattered clouds
6987,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,11.44,13.53,8.01,14.42,10.99,12.77,5.3,500.0,Rain,light rain
6988,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,10.21,11.06,9.59,11.86,9.69,10.55,9.01,500.0,Rain,light rain
6989,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,5.7,6.19,5.52,5.37,4.73,4.79,1.82,501.0,Rain,moderate rain
6990,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,5.62,5.78,4.41,3.73,4.65,5.78,2.7,500.0,Rain,light rain
6991,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,5.56,6.21,5.12,3.8,3.43,3.2,3.13,500.0,Rain,light rain
6992,Toulouse,298222566.0,43.604462,1.444247,Modern flat 50m from the Capitole - Toulouse -...,https://www.booking.com/hotel/fr/modern-flat-5...,8.3,"Set in the centre of Toulouse, 3.3 km from Zén...","43 rue Saint-Rome, 31000 Toulouse, France",43.6033373,...,2.11,4.9,-0.17,2.85,-2.1,1.67,-4.49,801.0,Clouds,few clouds
6993,Toulouse,298222566.0,43.604462,1.444247,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.6060322,...,9.74,17.64,6.98,14.7,7.29,16.69,6.98,802.0,Clouds,scattered clouds
6994,Toulouse,298222566.0,43.604462,1.444247,Hotel Albert 1er,https://www.booking.com/hotel/fr/hotelalbertto...,8.3,"Located in the heart of Toulouse, the Hotel Al...","8 rue Rivals, 31000 Toulouse, France",43.6060322,...,11.44,13.53,8.01,14.42,10.99,12.77,5.3,500.0,Rain,light rain


In [106]:
kayak_df.columns

Index(['city_name', 'place_id', 'lat', 'lon', 'hotel_name', 'hotel_url',
       'score', 'description', 'location', 'hotel_latitude',
       'hotel_longtitude', 'day_in_city', 'dt', 'sunrise', 'sunset',
       'pressure', 'humidity', 'dew_point', 'wind_speed', 'wind_deg',
       'wind_gust', 'clouds', 'pop', 'uvi', 'rain', 'snow', 'temp_day',
       'temp_min', 'temp_max', 'temp_night', 'temp_eve', 'temp_morn',
       'feels_like_day', 'feels_like_night', 'feels_like_eve',
       'feels_like_morn', 'weather_id', 'weather_main', 'weather_description'],
      dtype='object')

Now we have the dataframe with all the necessary information, but some adjustments have to be made.

We'll rename some of the columns for clarity:

In [107]:
kayak_df.rename(columns = {'place_id':'city_id'}, inplace = True)
kayak_df.rename(columns = {'lat':'city_latitude'}, inplace = True)
kayak_df.rename(columns = {'lon':'city_longtitude'}, inplace = True)
kayak_df.rename(columns = {'dt':'date'}, inplace = True)

# create a function for renaming columns


Values in some columns are still in string format while they should be in float or datetime format:

In [108]:
kayak_df.dtypes

city_name               object
city_id                float64
city_latitude          float64
city_longtitude        float64
hotel_name              object
hotel_url               object
score                   object
description             object
location                object
hotel_latitude          object
hotel_longtitude        object
day_in_city             object
date                    object
sunrise                 object
sunset                  object
pressure               float64
humidity               float64
dew_point              float64
wind_speed             float64
wind_deg               float64
wind_gust              float64
clouds                 float64
pop                    float64
uvi                    float64
rain                   float64
snow                   float64
temp_day               float64
temp_min               float64
temp_max               float64
temp_night             float64
temp_eve               float64
temp_morn              float64
feels_li

We'll convert the values columns "date", "sunrise", "sunset" to datetime format.

In [109]:
columns_to_convert = ["date", "sunrise", "sunset"]
for column in columns_to_convert: 
    kayak_df[column] = pd.to_datetime(kayak_df[column], format='%Y-%m-%d %H:%M:%S')

In [110]:

for column in columns_to_convert: 
    kayak_df[column] = pd.to_datetime(kayak_df[column], format='%Y-%m-%d %H:%M:%S')

Checking which are the columns that still contain values in string ("object") format

In [111]:
(kayak_df.select_dtypes(include=['object'])).columns

Index(['city_name', 'hotel_name', 'hotel_url', 'score', 'description',
       'location', 'hotel_latitude', 'hotel_longtitude', 'day_in_city',
       'weather_main', 'weather_description'],
      dtype='object')

In [112]:
columns_to_convert = ['score', 'hotel_latitude', 'hotel_longtitude']
for column in columns_to_convert: 
    kayak_df[column] = kayak_df[column].astype(float)

# Checking the value type for dataframe columns:
print(kayak_df.dtypes)

city_name                      object
city_id                       float64
city_latitude                 float64
city_longtitude               float64
hotel_name                     object
hotel_url                      object
score                         float64
description                    object
location                       object
hotel_latitude                float64
hotel_longtitude              float64
day_in_city                    object
date                   datetime64[ns]
sunrise                datetime64[ns]
sunset                 datetime64[ns]
pressure                      float64
humidity                      float64
dew_point                     float64
wind_speed                    float64
wind_deg                      float64
wind_gust                     float64
clouds                        float64
pop                           float64
uvi                           float64
rain                          float64
snow                          float64
temp_day    

In [113]:
kayak_df["date"][29]

Timestamp('2023-02-21 13:00:00')

In [114]:
(kayak_df["date"][29]).day_name()

'Tuesday'

In [115]:
kayak_df["day_of_week"] = kayak_df["date"].apply(lambda x: x.day_name())

In [116]:
kayak_df.head()

Unnamed: 0,city_name,city_id,city_latitude,city_longtitude,hotel_name,hotel_url,score,description,location,hotel_latitude,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,day_of_week
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,...,10.5,2.94,12.75,4.25,8.23,0.82,803.0,Clouds,broken clouds,Monday
1,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,...,7.25,5.91,12.67,4.59,7.25,4.05,500.0,Rain,light rain,Tuesday
2,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,...,4.95,3.66,5.55,4.19,4.95,3.66,500.0,Rain,light rain,Wednesday
3,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,...,-0.56,-0.87,-0.11,-0.94,-0.56,-2.66,616.0,Snow,rain and snow,Thursday
4,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,...,-0.29,-1.38,0.61,-0.75,-0.29,-1.38,601.0,Snow,snow,Friday


In [117]:
kayak_df.columns

Index(['city_name', 'city_id', 'city_latitude', 'city_longtitude',
       'hotel_name', 'hotel_url', 'score', 'description', 'location',
       'hotel_latitude', 'hotel_longtitude', 'day_in_city', 'date', 'sunrise',
       'sunset', 'pressure', 'humidity', 'dew_point', 'wind_speed', 'wind_deg',
       'wind_gust', 'clouds', 'pop', 'uvi', 'rain', 'snow', 'temp_day',
       'temp_min', 'temp_max', 'temp_night', 'temp_eve', 'temp_morn',
       'feels_like_day', 'feels_like_night', 'feels_like_eve',
       'feels_like_morn', 'weather_id', 'weather_main', 'weather_description',
       'day_of_week'],
      dtype='object')

In [118]:
# rearranging the order of columns for clarity
kayak_df = kayak_df[['city_name', 'city_id', 'city_latitude', 'city_longtitude',
       'hotel_name', 'hotel_url', 'score', 'description', 'location',
       'hotel_latitude', 'hotel_longtitude', 'date', 'day_of_week','day_in_city','sunrise',
       'sunset', 'weather_main', 'weather_description', 'weather_id','pressure', 
       'humidity', 'dew_point', 'wind_speed', 'wind_deg',
       'wind_gust', 'clouds', 'pop', 'uvi', 'rain', 'snow', 'temp_day',
       'temp_min', 'temp_max', 'temp_night', 'temp_eve', 'temp_morn',
       'feels_like_day', 'feels_like_night', 'feels_like_eve','feels_like_morn']]

In [119]:
pd.set_option('display.max_columns', None)
kayak_df.head(3)

Unnamed: 0,city_name,city_id,city_latitude,city_longtitude,hotel_name,hotel_url,score,description,location,hotel_latitude,hotel_longtitude,date,day_of_week,day_in_city,sunrise,sunset,weather_main,weather_description,weather_id,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,clouds,pop,uvi,rain,snow,temp_day,temp_min,temp_max,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn
0,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,1.839207,2023-02-20 13:00:00,Monday,Ariège day 1,2023-02-20 07:45:26,2023-02-20 18:31:12,Clouds,broken clouds,803.0,1024.0,35.0,-0.82,2.49,181.0,2.35,68.0,0.0,3.08,,,14.35,2.93,14.35,6.13,10.5,2.94,12.75,4.25,8.23,0.82
1,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,1.839207,2023-02-21 13:00:00,Tuesday,Ariège day 2,2023-02-21 07:43:55,2023-02-21 18:32:29,Rain,light rain,500.0,1017.0,42.0,2.41,3.07,324.0,4.2,96.0,0.35,2.42,0.44,,14.11,4.59,14.11,4.59,7.25,5.91,12.67,4.59,7.25,4.05
2,Ariège,297389050.0,42.945537,1.406554,Lagrange Vacances Les Chalets d’Ax,https://www.booking.com/hotel/fr/residence-les...,7.6,Lagrange Vacances Les Chalets d’Ax is located...,"Quartier De Castel Maou - Chemin d'Aouredou -,...",42.713832,1.839207,2023-02-22 13:00:00,Wednesday,Ariège day 3,2023-02-22 07:42:24,2023-02-22 18:33:47,Rain,light rain,500.0,1019.0,90.0,5.64,1.64,302.0,2.67,94.0,1.0,1.04,2.6,,5.55,2.76,7.24,4.19,4.95,3.66,5.55,4.19,4.95,3.66


In [120]:
kayak_df.to_csv("kayak.csv", index=False)

In [121]:
"""# Access key for user with access to write in S3 bucket
S3_ACCESS_KEY_ID =  os.getenv("S3_ACCESS_KEY_ID")
# Secret key for user with access to write in S3 bucket 
S3_SECRET_ACCESS_KEY =  os.getenv("S3_SECRET_ACCESS_KEY")

# Writing the .csv file to bucket S3
session = boto3.Session(aws_access_key_id=S3_ACCESS_KEY_ID, 
                      aws_secret_access_key=S3_SECRET_ACCESS_KEY)
s3 = session.resource("s3")
bucket = s3.Bucket("kayak-booking-bucket-12-12-2022") 
bucket.upload_file("kayak.csv", Key="kayak.csv")"""