In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import os 
import json
import logging

In [2]:
# Scrape data from destinations
city_names = [
    "Mont Saint Michel",
    "St Malo",
    "Bayeux",
    "Le Havre",
    "Rouen",
    "Paris",
    "Amiens",
    "Lille",
    "Strasbourg",
    "Chateau du Haut Koenigsbourg",
    "Colmar",
    "Eguisheim",
    "Besancon",
    "Dijon",
    "Annecy",
    "Grenoble",
    "Lyon",
    "Bormes les Mimosas",
    "Cassis",
    "Marseille",
    "Aix en Provence",
    "Avignon",
    "Uzes",
    "Nimes",
    "Aigues Mortes",
    "Saintes Maries de la mer",
    "Collioure",
    "Carcassonne",
    "Foix",
    "Toulouse",
    "Montauban",
    "Biarritz",
    "Bayonne",
    "La Rochelle",
    "Gorges Du Verdon"
]



In [3]:
# Making a dataframe from our list of cities
dataset = pd.DataFrame(city_names)
dataset.columns=["city_names"]
dataset

Unnamed: 0,city_names
0,Mont Saint Michel
1,St Malo
2,Bayeux
3,Le Havre
4,Rouen
5,Paris
6,Amiens
7,Lille
8,Strasbourg
9,Chateau du Haut Koenigsbourg


In [4]:
# In the API, the spaces are replaced by '+', so we need to do the same in our df
dataset['city_names'] = dataset['city_names'].str.replace(' ', '+')

In [5]:
dataset.head(35)

Unnamed: 0,city_names
0,Mont+Saint+Michel
1,St+Malo
2,Bayeux
3,Le+Havre
4,Rouen
5,Paris
6,Amiens
7,Lille
8,Strasbourg
9,Chateau+du+Haut+Koenigsbourg


In [6]:
# Making a first request on the API
url = 'https://nominatim.openstreetmap.org/search?q=paris&format=json'
response = requests.get(url)
response

<Response [200]>

In [7]:
# Looking at the shape of a response
response.json()[0]

{'place_id': 111607,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'node',
 'osm_id': 17807753,
 'boundingbox': ['48.6966969', '49.0166969', '2.1914616', '2.5114616'],
 'lat': '48.8566969',
 'lon': '2.3514616',
 'display_name': 'Paris, France métropolitaine, 75044, France',
 'class': 'place',
 'type': 'city',
 'importance': 0.9317101715588673,
 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//poi_place_city.p.20.png'}

In [8]:
# Latitudes + Longitudes
response.json()[0]['lat'], response.json()[0]['lon']

('48.8566969', '2.3514616')

In [9]:
# Scraping the coordinates for every city
for i in dataset.index :
    city = dataset.loc[i, 'city_names']
    rr = requests.get('https://nominatim.openstreetmap.org/search?q={}&format=json'.format(city)).json()
    dataset.loc[i,'Latitude'] = rr[0]['lat']
    dataset.loc[i,'Longitude'] = rr[0]['lon']

In [10]:
dataset.head(36)

Unnamed: 0,city_names,Latitude,Longitude
0,Mont+Saint+Michel,48.6355232,-1.5102571
1,St+Malo,48.649518,-2.0260409
2,Bayeux,49.2764624,-0.7024738
3,Le+Havre,49.4938975,0.1079732
4,Rouen,49.4404591,1.0939658
5,Paris,48.8566969,2.3514616
6,Amiens,49.8941708,2.2956951
7,Lille,50.6365654,3.0635282
8,Strasbourg,48.584614,7.7507127
9,Chateau+du+Haut+Koenigsbourg,48.249489800000006,7.34429620253195


In [11]:
# How does a weather informations request looks like?
url2 = 'https://api.openweathermap.org/data/2.5/onecall?lat=43.7497&lon=6.32859&units=metric&exclude=current,minutely,hourly&appid=72ea71a8e14d59d89841a2c2a2798c1a'
resp = requests.get(url2)
resp.json()

{'lat': 43.7497,
 'lon': 6.3286,
 'timezone': 'Europe/Paris',
 'timezone_offset': 7200,
 'daily': [{'dt': 1633950000,
   'sunrise': 1633931036,
   'sunset': 1633971513,
   'moonrise': 1633952940,
   'moonset': 1633984080,
   'moon_phase': 0.19,
   'temp': {'day': 17.36,
    'min': 8.3,
    'max': 18.02,
    'night': 9.27,
    'eve': 15.45,
    'morn': 8.39},
   'feels_like': {'day': 16.38, 'night': 9.27, 'eve': 14.49, 'morn': 8.39},
   'pressure': 1019,
   'humidity': 47,
   'dew_point': 5.95,
   'wind_speed': 2.29,
   'wind_deg': 232,
   'wind_gust': 3.28,
   'weather': [{'id': 800,
     'main': 'Clear',
     'description': 'clear sky',
     'icon': '01d'}],
   'clouds': 3,
   'pop': 0.05,
   'uvi': 3.91},
  {'dt': 1634036400,
   'sunrise': 1634017508,
   'sunset': 1634057810,
   'moonrise': 1634043240,
   'moonset': 1634074200,
   'moon_phase': 0.22,
   'temp': {'day': 15.67,
    'min': 7.92,
    'max': 15.67,
    'night': 9.09,
    'eve': 10.63,
    'morn': 7.93},
   'feels_like': {

In [12]:
# For one coordinates, we get a week full of weather data. To get the daily temp of the first day:
resp.json()['daily'][0]['temp']['day']

17.36

In [13]:
# Now that we know how to get the informations we need, we can scrape a week's full of temperature and weather data for every city
for i in dataset.index:
    lati = dataset.loc[i,'Latitude']
    longi = dataset.loc[i,'Longitude']
    data = requests.get('https://api.openweathermap.org/data/2.5/onecall?lat={}&lon={}&exclude=current&units=metric&appid=72ea71a8e14d59d89841a2c2a2798c1a'.format(lati,longi))
    dataset.loc[i,'Temp j0'] = data.json()['daily'][0]['temp']['day']
    dataset.loc[i,'Temp j+1'] = data.json()['daily'][1]['temp']['day']
    dataset.loc[i,'Temp j+2'] = data.json()['daily'][2]['temp']['day']
    dataset.loc[i,'Temp j+3'] = data.json()['daily'][3]['temp']['day']
    dataset.loc[i,'Temp j+4'] = data.json()['daily'][4]['temp']['day']
    dataset.loc[i,'Temp j+5'] = data.json()['daily'][5]['temp']['day']
    dataset.loc[i,'Temp j+6'] = data.json()['daily'][6]['temp']['day']
    dataset.loc[i,'Temp j+7'] = data.json()['daily'][7]['temp']['day']


    dataset.loc[i,'Alerte j0'] = data.json()['daily'][0]['weather'][0]['description']
    dataset.loc[i,'Alerte j+1'] = data.json()['daily'][1]['weather'][0]['description']
    dataset.loc[i,'Alerte j+2'] = data.json()['daily'][2]['weather'][0]['description']
    dataset.loc[i,'Alerte j+3'] = data.json()['daily'][3]['weather'][0]['description']
    dataset.loc[i,'Alerte j+4'] = data.json()['daily'][4]['weather'][0]['description']
    dataset.loc[i,'Alerte j+5'] = data.json()['daily'][5]['weather'][0]['description']
    dataset.loc[i,'Alerte j+6'] = data.json()['daily'][6]['weather'][0]['description']
    dataset.loc[i,'Alerte j+7'] = data.json()['daily'][7]['weather'][0]['description']


dataset.head()

Unnamed: 0,city_names,Latitude,Longitude,Temp j0,Temp j+1,Temp j+2,Temp j+3,Temp j+4,Temp j+5,Temp j+6,Temp j+7,Alerte j0,Alerte j+1,Alerte j+2,Alerte j+3,Alerte j+4,Alerte j+5,Alerte j+6,Alerte j+7
0,Mont+Saint+Michel,48.6355232,-1.5102571,16.68,15.56,15.18,15.77,15.99,16.69,16.43,16.21,few clouds,scattered clouds,overcast clouds,clear sky,overcast clouds,scattered clouds,overcast clouds,scattered clouds
1,St+Malo,48.649518,-2.0260409,15.78,15.64,15.53,14.84,14.58,15.35,15.65,15.74,few clouds,scattered clouds,overcast clouds,clear sky,overcast clouds,broken clouds,overcast clouds,scattered clouds
2,Bayeux,49.2764624,-0.7024738,14.96,15.68,14.89,15.79,15.19,16.4,15.93,16.06,broken clouds,broken clouds,light rain,clear sky,overcast clouds,scattered clouds,broken clouds,clear sky
3,Le+Havre,49.4938975,0.1079732,14.5,14.69,14.5,14.41,14.44,14.49,14.85,15.18,light rain,broken clouds,overcast clouds,clear sky,overcast clouds,overcast clouds,broken clouds,clear sky
4,Rouen,49.4404591,1.0939658,14.67,14.94,14.05,15.45,15.68,13.97,14.85,15.4,broken clouds,overcast clouds,light rain,scattered clouds,overcast clouds,overcast clouds,overcast clouds,clear sky


In [14]:
# Create a column with the average temperature in a week
dataset['Avg Temp'] = dataset[['Temp j0', 'Temp j+1','Temp j+2','Temp j+3','Temp j+4','Temp j+5','Temp j+6','Temp j+7']].mean(axis=1)
dataset.head()

Unnamed: 0,city_names,Latitude,Longitude,Temp j0,Temp j+1,Temp j+2,Temp j+3,Temp j+4,Temp j+5,Temp j+6,Temp j+7,Alerte j0,Alerte j+1,Alerte j+2,Alerte j+3,Alerte j+4,Alerte j+5,Alerte j+6,Alerte j+7,Avg Temp
0,Mont+Saint+Michel,48.6355232,-1.5102571,16.68,15.56,15.18,15.77,15.99,16.69,16.43,16.21,few clouds,scattered clouds,overcast clouds,clear sky,overcast clouds,scattered clouds,overcast clouds,scattered clouds,16.06375
1,St+Malo,48.649518,-2.0260409,15.78,15.64,15.53,14.84,14.58,15.35,15.65,15.74,few clouds,scattered clouds,overcast clouds,clear sky,overcast clouds,broken clouds,overcast clouds,scattered clouds,15.38875
2,Bayeux,49.2764624,-0.7024738,14.96,15.68,14.89,15.79,15.19,16.4,15.93,16.06,broken clouds,broken clouds,light rain,clear sky,overcast clouds,scattered clouds,broken clouds,clear sky,15.6125
3,Le+Havre,49.4938975,0.1079732,14.5,14.69,14.5,14.41,14.44,14.49,14.85,15.18,light rain,broken clouds,overcast clouds,clear sky,overcast clouds,overcast clouds,broken clouds,clear sky,14.6325
4,Rouen,49.4404591,1.0939658,14.67,14.94,14.05,15.45,15.68,13.97,14.85,15.4,broken clouds,overcast clouds,light rain,scattered clouds,overcast clouds,overcast clouds,overcast clouds,clear sky,14.87625


In [15]:
# Adding an ID column
dataset.insert(0, 'ID', range(0 + len(dataset)))
dataset.head()

Unnamed: 0,ID,city_names,Latitude,Longitude,Temp j0,Temp j+1,Temp j+2,Temp j+3,Temp j+4,Temp j+5,...,Temp j+7,Alerte j0,Alerte j+1,Alerte j+2,Alerte j+3,Alerte j+4,Alerte j+5,Alerte j+6,Alerte j+7,Avg Temp
0,0,Mont+Saint+Michel,48.6355232,-1.5102571,16.68,15.56,15.18,15.77,15.99,16.69,...,16.21,few clouds,scattered clouds,overcast clouds,clear sky,overcast clouds,scattered clouds,overcast clouds,scattered clouds,16.06375
1,1,St+Malo,48.649518,-2.0260409,15.78,15.64,15.53,14.84,14.58,15.35,...,15.74,few clouds,scattered clouds,overcast clouds,clear sky,overcast clouds,broken clouds,overcast clouds,scattered clouds,15.38875
2,2,Bayeux,49.2764624,-0.7024738,14.96,15.68,14.89,15.79,15.19,16.4,...,16.06,broken clouds,broken clouds,light rain,clear sky,overcast clouds,scattered clouds,broken clouds,clear sky,15.6125
3,3,Le+Havre,49.4938975,0.1079732,14.5,14.69,14.5,14.41,14.44,14.49,...,15.18,light rain,broken clouds,overcast clouds,clear sky,overcast clouds,overcast clouds,broken clouds,clear sky,14.6325
4,4,Rouen,49.4404591,1.0939658,14.67,14.94,14.05,15.45,15.68,13.97,...,15.4,broken clouds,overcast clouds,light rain,scattered clouds,overcast clouds,overcast clouds,overcast clouds,clear sky,14.87625


In [16]:
# Changing the latitude and longitude columns' type to float
dataset['Latitude'] = dataset['Latitude'].astype(float)
dataset['Longitude'] = dataset['Longitude'].astype(float)

In [18]:
# What are the 5 hottest cities on average for the given week?
ranking_avg_temp = dataset[['city_names','Avg Temp', 'Latitude', 'Longitude']].sort_values(by= 'Avg Temp', ascending=False)
ranking_cities = ranking_avg_temp[0:5]
ranking_cities

Unnamed: 0,city_names,Avg Temp,Latitude,Longitude
26,Collioure,19.81125,42.52505,3.083155
27,Carcassonne,19.3275,43.213036,2.349107
28,Foix,19.19875,42.9639,1.605381
18,Cassis,18.81375,43.214036,5.539632
20,Aix+en+Provence,18.785,43.529842,5.447474


In [19]:
# Replacing the + by spaces in city names
dataset['city_names'] = dataset['city_names'].str.replace('+', ' ')

In [20]:
# Saving our results in new CSVs
dataset.to_csv('kayak_cities_weather_noindex.csv', index=False)
ranking_cities.to_csv('ranking_cities.csv', index=False)