In [1]:
# Installing dotenv library to manage confidential keys saved as environment variables
#!pip install python-dotenv

In [2]:
import requests
import json
import pandas as pd
import boto3

import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
def get_city_name(name):
    # Extracts the name of the city from the detailed name of the destination
    return name.split(', ')[0]

In [4]:
# List of top-35 tourist destinations in France
best_cities_list = ["Mont Saint Michel", 
                        "St Malo",
                        "Bayeux",
                        "Le Havre",
                        "Rouen",
                        "Paris",
                        "Amiens",
                        "Lille",
                        "Strasbourg",
                        "Chateau du Haut Koenigsbourg",
                        "Colmar",
                        "Eguisheim",
                        "Besancon",
                        "Dijon",
                        "Annecy",
                        "Grenoble",
                        "Lyon",
                        "Gorges du Verdon",
                        "Bormes les Mimosas",
                        "Cassis",
                        "Marseille",
                        "Aix en Provence",
                        "Avignon",
                        "Uzes",
                        "Nimes",
                        "Aigues Mortes",
                        "Saintes Maries de la mer",
                        "Collioure",
                        "Carcassonne",
                        "Ariege",
                        "Toulouse",
                        "Montauban",
                        "Biarritz",
                        "Bayonne",
                        "La Rochelle"]


### Getting the city coordinates data

In [5]:
# Iterating over the list of destinations to get GPS coordinates in .json format for each destination 
# with the help of API of nominatim.openstreetmap.org

results = []

for city in best_cities_list:
    response = requests.get(f"https://nominatim.openstreetmap.org/search?q={city}&country=France&format=json")
    results.append(response.json())
    

In [12]:
# Checking what an element of the 'results' list looks like
# (commented to avoid long output)
#print("First element of 'results' list: ", results[0])
print()

# Checking what information about one city looks like
print("Information about one city: ", results[0][0])


Information about one city:  {'place_id': 156094680, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright', 'osm_type': 'way', 'osm_id': 211285890, 'boundingbox': ['48.6349172', '48.637031', '-1.5133292', '-1.5094796'], 'lat': '48.6359541', 'lon': '-1.511459954959514', 'display_name': "Mont Saint-Michel, Terrasse de l'Abside, Le Mont-Saint-Michel, Avranches, Manche, Normandie, France métropolitaine, 50170, France", 'class': 'tourism', 'type': 'attraction', 'importance': 0.755436556781574, 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons/poi_point_of_interest.p.20.png'}


In [8]:
# Creating a list of dictionaries (one dictionary per city)
city_list = []

for i in range(0, len(results)):
    city_info = results[i][0]
    city_list.append(city_info)

In [9]:
# Creating a dataframe from list of dictionaries
cities_df = pd.DataFrame.from_records(city_list)

# Checking what the resulting dataframe looks like
display(cities_df.head())


Unnamed: 0,place_id,licence,osm_type,osm_id,boundingbox,lat,lon,display_name,class,type,importance,icon
0,156094680,"Data © OpenStreetMap contributors, ODbL 1.0. h...",way,211285890,"[48.6349172, 48.637031, -1.5133292, -1.5094796]",48.6359541,-1.511459954959514,"Mont Saint-Michel, Terrasse de l'Abside, Le Mo...",tourism,attraction,0.755437,https://nominatim.openstreetmap.org/ui/mapicon...
1,297756747,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,905534,"[48.5979853, 48.6949736, -2.0765246, -1.9367259]",48.649518,-2.0260409,"Saint-Malo, Ille-et-Vilaine, Bretagne, France ...",boundary,administrative,0.676467,https://nominatim.openstreetmap.org/ui/mapicon...
2,297981358,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,145776,"[49.2608124, 49.2934736, -0.7275671, -0.6757378]",49.2764624,-0.7024738,"Bayeux, Calvados, Normandie, France métropolit...",boundary,administrative,0.6827,https://nominatim.openstreetmap.org/ui/mapicon...
3,298137491,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,104492,"[49.4516697, 49.5401463, 0.0667992, 0.1955556]",49.4938975,0.1079732,"Le Havre, Seine-Maritime, Normandie, France mé...",boundary,administrative,0.822333,https://nominatim.openstreetmap.org/ui/mapicon...
4,297518815,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,75628,"[49.4172001, 49.4652601, 1.0300648, 1.1521157]",49.4404591,1.0939658,"Rouen, Seine-Maritime, Normandie, France métro...",boundary,administrative,0.750073,https://nominatim.openstreetmap.org/ui/mapicon...


In [10]:
# Creating a new dataframe with only the necessary columns
city_coord = cities_df[['place_id', 'display_name', 'lat', 'lon' ]].copy()

# Checking what the resulting dataframe looks like
display(city_coord.head())


Unnamed: 0,place_id,display_name,lat,lon
0,156094680,"Mont Saint-Michel, Terrasse de l'Abside, Le Mo...",48.6359541,-1.511459954959514
1,297756747,"Saint-Malo, Ille-et-Vilaine, Bretagne, France ...",48.649518,-2.0260409
2,297981358,"Bayeux, Calvados, Normandie, France métropolit...",49.2764624,-0.7024738
3,298137491,"Le Havre, Seine-Maritime, Normandie, France mé...",49.4938975,0.1079732
4,297518815,"Rouen, Seine-Maritime, Normandie, France métro...",49.4404591,1.0939658


In [12]:
# Creating a column "city_name" that will contain only the name of the city without other details.
# We use a previously defined function "get_city_name" to do this.
city_coord['city_name'] = city_coord['display_name'].apply(get_city_name)

# Leaving only the necessary columns
city_coord=city_coord[['place_id', 'city_name','lat', 'lon']]

# Checking the resulting dataframe
display(city_coord.head())

Unnamed: 0,place_id,city_name,lat,lon
0,156094680,Mont Saint-Michel,48.6359541,-1.511459954959514
1,297756747,Saint-Malo,48.649518,-2.0260409
2,297981358,Bayeux,49.2764624,-0.7024738
3,298137491,Le Havre,49.4938975,0.1079732
4,297518815,Rouen,49.4404591,1.0939658


### Saving the dataframe locally and in bucket S3

In [14]:
# Saving the dataframe as .csv file locally
city_coord.to_csv("city_coordinates.csv", index=False)

In [16]:
# (Cell is commented to avoid unnecessary rewriting in S3)
"""# Access key for user with access to write in S3 bucket
S3_ACCESS_KEY_ID =  os.getenv("S3_ACCESS_KEY_ID")
# Secret key for user with access to write in S3 bucket 
S3_SECRET_ACCESS_KEY =  os.getenv("S3_SECRET_ACCESS_KEY")

# Writing the .csv file to bucket S3
session = boto3.Session(aws_access_key_id=S3_ACCESS_KEY_ID, 
                        aws_secret_access_key=S3_SECRET_ACCESS_KEY)
s3 = session.resource("s3")
bucket = s3.Bucket("kayak-booking-bucket-12-12-2022") 
bucket.upload_file("city_coordinates.csv", Key="city_coordinates.csv")"""
