## S3 bucket: store hotel and weather info for top 35 cities to visit in France  

In this notebook I :  
-  Created an **AWS s3 bucket** containing a single dataframe with weather and hotel information for the [top 35 best cities in France](https://one-week-in.com/35-cities-to-visit-in-france/)
-  City gps and weather and information was scrapped from the [nominatim](https://nominatim.org/) and [openweathermap](https://openweathermap.org/api/one-call-api) APIs
-  Hotel information was scraped from [booking.com](https://www.booking.com)

## Import libraries

In [16]:
import boto3
import pandas as pd
import json
import ast

## AWS configuration

In [17]:
from src.credentials import keys_aws

#Create session using AWS access keys and IDs
session = boto3.Session(aws_access_key_id=keys_aws.loc[0,'Access key ID'], 
aws_secret_access_key=keys_aws.loc[0,'Secret access key'])

## Ressource session

In [18]:
s3_resource = session.resource('s3')
bucket_name = "kayak-amorenov"

## Create bucket

In [19]:
# Indicate public-read for read access to AllUsers group
kayak_bucket = s3_resource.create_bucket(Bucket=bucket_name,
ACL = 'public-read',
CreateBucketConfiguration={
        'LocationConstraint': 'eu-west-3'})

kayak_bucket

s3.Bucket(name='kayak-amorenov')

In [20]:
# Upload bucket policy to make S3 bucket publicly available
file = open("config/AWS_bucket_policy.txt", "r")

policy_file = file.read()
policy_dict = ast.literal_eval(policy_file)

policy_dict

{'Version': '2012-10-17',
 'Statement': [{'Sid': 'PublicRead',
   'Effect': 'Allow',
   'Principal': '*',
   'Action': ['s3:GetObject', 's3:GetObjectVersion'],
   'Resource': ['arn:aws:s3:::DOC-EXAMPLE-BUCKET/*']}]}

In [21]:
# Indicate bucket name inside policy
policy_dict['Statement'][0]['Resource'][0] = 'arn:aws:s3:::{}/*'.format(bucket_name)

policy_dict

{'Version': '2012-10-17',
 'Statement': [{'Sid': 'PublicRead',
   'Effect': 'Allow',
   'Principal': '*',
   'Action': ['s3:GetObject', 's3:GetObjectVersion'],
   'Resource': ['arn:aws:s3:::kayak-amorenov/*']}]}

In [22]:
# Update bucket policy 
bucket_policy = s3_resource.BucketPolicy(bucket_name)

bucket_policy_new = json.dumps(policy_dict)
bucket_policy.put(Policy=bucket_policy_new)

{'ResponseMetadata': {'RequestId': '2AED6S1PV21GP7FH',
  'HostId': '6rBZ9bxL1mUYd6bbe9NDpvdaHuKCRGxTzVsIUjVo75TQ8qVD7Hw5ZjdRIp2H+QGdLbBEhZr+nPc=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': '6rBZ9bxL1mUYd6bbe9NDpvdaHuKCRGxTzVsIUjVo75TQ8qVD7Hw5ZjdRIp2H+QGdLbBEhZr+nPc=',
   'x-amz-request-id': '2AED6S1PV21GP7FH',
   'date': 'Mon, 18 Apr 2022 17:50:17 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

## Import data for bucket

In [23]:
# Import output from weather scraping
weather_df_s3 = pd.read_csv('results/top_35_cities_france_weather_gps.csv')
print(weather_df_s3.shape)
weather_df_s3.head()

(272, 34)


Unnamed: 0,lat,lon,timezone,timezone_offset,dt,sunrise,sunset,moonrise,moonset,moon_phase,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,rain,Day,snow,Search_Name
0,45.3314,1.0301,Europe/Paris,7200,1650279600,1650258335,1650307469,1650316380,1650261660,0.56,...,17.67,7.33,17.85,14.35,17.03,7.33,,Today,,Aigues Mortes
1,45.3314,1.0301,Europe/Paris,7200,1650366000,1650344632,1650393947,0,1650349920,0.6,...,11.63,9.06,13.78,7.92,11.09,8.51,9.62,Day_1,,Aigues Mortes
2,45.3314,1.0301,Europe/Paris,7200,1650452400,1650430930,1650480424,1650407640,1650438840,0.64,...,9.95,8.97,9.9,7.07,9.77,8.32,16.99,Day_2,,Aigues Mortes
3,45.3314,1.0301,Europe/Paris,7200,1650538800,1650517229,1650566901,1650498660,1650528360,0.67,...,11.14,7.8,10.62,9.0,10.53,7.8,3.86,Day_3,,Aigues Mortes
4,45.3314,1.0301,Europe/Paris,7200,1650625200,1650603529,1650653378,1650588960,1650618600,0.71,...,14.03,6.59,14.66,10.48,13.42,5.83,0.14,Day_4,,Aigues Mortes


In [24]:
# Import output from hotels  scraping
booking_df_s3 = pd.read_csv('results/scrap_booking_s3.csv')
print(booking_df_s3.shape)
booking_df_s3.head()

(3466, 15)


Unnamed: 0,city,suburbs,hotel_name,link,rating,room_type,price,stay,guests,room,description,location,map_link,hotel_lat,hotel_lon
0,Saint Malo,"Sillon, Saint Malo",Antinéa,https://www.booking.com/hotel/fr/antinea.en-gb...,8.3,Family Room (2 Adults + 2 Children),"€ 1,667",6 nights,2 adults,98 reviews,Free cancellation,1.6 km from centre,https://www.booking.com/hotel/fr/antinea.en-gb...,48.655543,-2.005139
1,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 539,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275
2,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 539,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275
3,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 555,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275
4,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 555,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275


In [25]:
# Homogenize city names to be able to merge dataframes if necessary
from src.cities_weather import cities_meta_df
print(cities_meta_df.shape)
print(cities_meta_df.head())

weather_df_s3.rename(columns={"Search_Name" : "city"}, inplace=True)
booking_df_s3.rename(columns={"city":"city_clean"}, inplace=True)

(35, 2)
                city         city_clean
0  Mont Saint Michel  Mont Saint Michel
1         Saint-Malo            St Malo
2             Bayeux             Bayeux
3           Le Havre           Le Havre
4              Rouen              Rouen


In [26]:
weather_df_s3_clean = weather_df_s3.merge(cities_meta_df, how='left', on='city')
booking_df_s3_clean = booking_df_s3.merge(cities_meta_df, how='left', on='city_clean')

print(weather_df_s3_clean.columns)
print(booking_df_s3_clean.columns)

Index(['lat', 'lon', 'timezone', 'timezone_offset', 'dt', 'sunrise', 'sunset',
       'moonrise', 'moonset', 'moon_phase', 'pressure', 'humidity',
       'dew_point', 'wind_speed', 'wind_deg', 'wind_gust', 'weather', 'clouds',
       'pop', 'uvi', 'temp_day', 'temp_min', 'temp_max', 'temp_night',
       'temp_eve', 'temp_morn', 'feels_like_day', 'feels_like_night',
       'feels_like_eve', 'feels_like_morn', 'rain', 'Day', 'snow', 'city',
       'city_clean'],
      dtype='object')
Index(['city_clean', 'suburbs', 'hotel_name', 'link', 'rating', 'room_type',
       'price', 'stay', 'guests', 'room', 'description', 'location',
       'map_link', 'hotel_lat', 'hotel_lon', 'city'],
      dtype='object')


## Upload data to bucket

In [27]:
# Upload file to bucket
weathercsv_s3 = weather_df_s3_clean.to_csv(index=False)
booking_csv_s3 = booking_df_s3_clean.to_csv(index=False)

put_object = kayak_bucket.put_object(Key="top35cities_france_scrap_weather7d.csv", Body=weathercsv_s3)
put_object = kayak_bucket.put_object(Key="top35cities_france_scrap_hotelsbooking.csv", Body=booking_csv_s3)

## Verify public access to bucket

In [28]:
# Verify public access to created bucket: download file
# Additionally checked on own aws account (see screenshot of aws in reports/figures/01-Kayak_aws_bucket_creation.png)

test_bucket_weather = pd.read_csv('https://kayak-amorenov.s3.eu-west-3.amazonaws.com/top35cities_france_scrap_weather7d.csv')
test_bucket_booking = pd.read_csv('https://kayak-amorenov.s3.eu-west-3.amazonaws.com/top35cities_france_scrap_hotelsbooking.csv')

print(weather_df_s3_clean.shape)
print(test_bucket_weather.shape)


print(booking_df_s3_clean.shape)
print(test_bucket_booking.shape)


(272, 35)
(272, 35)
(3466, 16)
(3466, 16)


In [29]:
test_bucket_weather.head()

Unnamed: 0,lat,lon,timezone,timezone_offset,dt,sunrise,sunset,moonrise,moonset,moon_phase,...,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,rain,Day,snow,city,city_clean
0,45.3314,1.0301,Europe/Paris,7200,1650279600,1650258335,1650307469,1650316380,1650261660,0.56,...,7.33,17.85,14.35,17.03,7.33,,Today,,Aigues Mortes,Aigues Mortes
1,45.3314,1.0301,Europe/Paris,7200,1650366000,1650344632,1650393947,0,1650349920,0.6,...,9.06,13.78,7.92,11.09,8.51,9.62,Day_1,,Aigues Mortes,Aigues Mortes
2,45.3314,1.0301,Europe/Paris,7200,1650452400,1650430930,1650480424,1650407640,1650438840,0.64,...,8.97,9.9,7.07,9.77,8.32,16.99,Day_2,,Aigues Mortes,Aigues Mortes
3,45.3314,1.0301,Europe/Paris,7200,1650538800,1650517229,1650566901,1650498660,1650528360,0.67,...,7.8,10.62,9.0,10.53,7.8,3.86,Day_3,,Aigues Mortes,Aigues Mortes
4,45.3314,1.0301,Europe/Paris,7200,1650625200,1650603529,1650653378,1650588960,1650618600,0.71,...,6.59,14.66,10.48,13.42,5.83,0.14,Day_4,,Aigues Mortes,Aigues Mortes


In [30]:
test_bucket_booking.head()

Unnamed: 0,city_clean,suburbs,hotel_name,link,rating,room_type,price,stay,guests,room,description,location,map_link,hotel_lat,hotel_lon,city
0,Saint Malo,"Sillon, Saint Malo",Antinéa,https://www.booking.com/hotel/fr/antinea.en-gb...,8.3,Family Room (2 Adults + 2 Children),"€ 1,667",6 nights,2 adults,98 reviews,Free cancellation,1.6 km from centre,https://www.booking.com/hotel/fr/antinea.en-gb...,48.655543,-2.005139,
1,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 539,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275,
2,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 539,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275,
3,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 555,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275,
4,Saint Malo,"Parame, Saint Malo",Le RUELLAN charmant duplex proche plage,https://www.booking.com/hotel/fr/le-ruellan-ch...,8.3,Apartment,€ 555,6 nights,2 adults,Managed by a private host,Free cancellation,3.3 km from centre,https://www.booking.com/hotel/fr/le-ruellan-ch...,48.656487,-1.982275,
