In [1]:
# Installing dotenv library to manage confidential keys saved as environment variables
#!pip install python-dotenv

In [2]:
import requests
import json
import pandas as pd
import datetime
import boto3

import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# functions
# iterate over coordinates of all cities
# for each line, add name and id of the city
# define conditions to select best 5 cities

In [4]:
def timestamp_to_datetime(df, column):
# converts 10-digit timestamp to datetime
    for row in range(0, len(df)):
        df.loc[row, column] = datetime.datetime.fromtimestamp(int(df.loc[row, column]))

In [5]:
# Loading .csv file with city coordinates from S3 bucket
url = 'https://kayak-booking-bucket-12-12-2022.s3.eu-west-3.amazonaws.com/city_coordinates.csv'
city_coord = pd.read_csv(url)
city_coord.head(5)

Unnamed: 0,place_id,city_name,lat,lon
0,156094680,Mont Saint-Michel,48.635954,-1.51146
1,297756747,Saint-Malo,48.649518,-2.026041
2,297981358,Bayeux,49.276462,-0.702474
3,298137491,Le Havre,49.493898,0.107973
4,297518815,Rouen,49.440459,1.093966
5,297417241,Paris,48.85889,2.320041
6,297534793,Amiens,49.894171,2.295695
7,297472400,Lille,50.636565,3.063528
8,297508568,Strasbourg,48.584614,7.750713
9,120791766,Château du Haut-Kœnigsbourg,48.24949,7.344296


In [6]:
city_coord.tail()

Unnamed: 0,place_id,city_name,lat,lon
30,298222566,Toulouse,43.604462,1.444247
31,297932490,Montauban,44.017584,1.354999
32,298516909,Biarritz,43.471144,-1.552727
33,298271847,Bayonne,43.494514,-1.473666
34,297653650,La Rochelle,46.159113,-1.152043


In [7]:
# for multiple cities
results = []

API_KEY = os.getenv("API_KEY")

for i in range(0, len(city_coord)):
    lat = city_coord.loc[i, "lat"]
    lon = city_coord.loc[i, "lon"]
    response = requests.get(f"https://api.openweathermap.org/data/3.0/onecall?lat={lat}&lon={lon}&exclude=current,minutely,hourly&units=metric&appid={API_KEY}")
    results.append(response.json())

In [8]:
# Checking if we obtained information for all 35 cities:
# print(len(results))

# Checking what information we obtained for a city:
# print(results[0])

In [9]:
type(results[0])
# each element of the list is a dictionary containing 7-day forecast for a given city

dict

In [10]:
results[0]['daily']

[{'dt': 1676894400,
  'sunrise': 1676876753,
  'sunset': 1676914445,
  'moonrise': 1676878800,
  'moonset': 1676915460,
  'moon_phase': 0,
  'temp': {'day': 10.81,
   'min': 4.52,
   'max': 10.81,
   'night': 7.4,
   'eve': 7.46,
   'morn': 4.75},
  'feels_like': {'day': 9.8, 'night': 6.81, 'eve': 7.46, 'morn': 3.15},
  'pressure': 1028,
  'humidity': 71,
  'dew_point': 5.56,
  'wind_speed': 2.06,
  'wind_deg': 208,
  'wind_gust': 2.25,
  'weather': [{'id': 800,
    'main': 'Clear',
    'description': 'clear sky',
    'icon': '01d'}],
  'clouds': 3,
  'pop': 0,
  'uvi': 1.85},
 {'dt': 1676980800,
  'sunrise': 1676963044,
  'sunset': 1677000942,
  'moonrise': 1676966460,
  'moonset': 1677007020,
  'moon_phase': 0.04,
  'temp': {'day': 12.5,
   'min': 5.32,
   'max': 13.44,
   'night': 8.66,
   'eve': 9.88,
   'morn': 5.43},
  'feels_like': {'day': 11.48, 'night': 7.86, 'eve': 9.43, 'morn': 3.54},
  'pressure': 1018,
  'humidity': 64,
  'dew_point': 5.72,
  'wind_speed': 2.67,
  'wind_de

In [11]:
weather_by_day = []

for city in range(0, len(results)):
    for day in range(0, 7):
        day_weather = results[city]['daily'][day]
        weather_by_day.append(day_weather)

In [12]:
weekly_forecast_df = pd.DataFrame.from_records(weather_by_day)

In [13]:
type(weekly_forecast_df)

pandas.core.frame.DataFrame

In [14]:
weekly_forecast_df.head()

Unnamed: 0,dt,sunrise,sunset,moonrise,moonset,moon_phase,temp,feels_like,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,weather,clouds,pop,uvi,rain,snow
0,1676894400,1676876753,1676914445,1676878800,1676915460,0.0,"{'day': 10.81, 'min': 4.52, 'max': 10.81, 'nig...","{'day': 9.8, 'night': 6.81, 'eve': 7.46, 'morn...",1028,71,5.56,2.06,208,2.25,"[{'id': 800, 'main': 'Clear', 'description': '...",3,0.0,1.85,,
1,1676980800,1676963044,1677000942,1676966460,1677007020,0.04,"{'day': 12.5, 'min': 5.32, 'max': 13.44, 'nigh...","{'day': 11.48, 'night': 7.86, 'eve': 9.43, 'mo...",1018,64,5.72,2.67,139,4.01,"[{'id': 804, 'main': 'Clouds', 'description': ...",93,0.08,1.71,,
2,1677067200,1677049333,1677087438,1677053940,1677098400,0.08,"{'day': 8, 'min': 4.48, 'max': 9.18, 'night': ...","{'day': 4.68, 'night': 2.21, 'eve': 2.76, 'mor...",1015,88,5.93,6.78,318,10.43,"[{'id': 500, 'main': 'Rain', 'description': 'l...",100,0.77,1.24,3.6,
3,1677153600,1677135622,1677173935,1677141360,1677189600,0.12,"{'day': 8.1, 'min': 3.83, 'max': 9.03, 'night'...","{'day': 4.9, 'night': 2.04, 'eve': 2.94, 'morn...",1018,76,3.98,7.16,31,12.58,"[{'id': 500, 'main': 'Rain', 'description': 'l...",100,0.72,1.72,0.5,
4,1677240000,1677221909,1677260431,1677228780,0,0.15,"{'day': 8.98, 'min': 2.31, 'max': 9.96, 'night...","{'day': 6.21, 'night': 1.99, 'eve': 4.91, 'mor...",1015,67,3.02,6.72,29,12.65,"[{'id': 803, 'main': 'Clouds', 'description': ...",76,0.0,0.83,,


In [15]:
len(weekly_forecast_df)

245

In [16]:
weekly_forecast_df.select_dtypes(include=['object'])

Unnamed: 0,temp,feels_like,weather
0,"{'day': 10.81, 'min': 4.52, 'max': 10.81, 'nig...","{'day': 9.8, 'night': 6.81, 'eve': 7.46, 'morn...","[{'id': 800, 'main': 'Clear', 'description': '..."
1,"{'day': 12.5, 'min': 5.32, 'max': 13.44, 'nigh...","{'day': 11.48, 'night': 7.86, 'eve': 9.43, 'mo...","[{'id': 804, 'main': 'Clouds', 'description': ..."
2,"{'day': 8, 'min': 4.48, 'max': 9.18, 'night': ...","{'day': 4.68, 'night': 2.21, 'eve': 2.76, 'mor...","[{'id': 500, 'main': 'Rain', 'description': 'l..."
3,"{'day': 8.1, 'min': 3.83, 'max': 9.03, 'night'...","{'day': 4.9, 'night': 2.04, 'eve': 2.94, 'morn...","[{'id': 500, 'main': 'Rain', 'description': 'l..."
4,"{'day': 8.98, 'min': 2.31, 'max': 9.96, 'night...","{'day': 6.21, 'night': 1.99, 'eve': 4.91, 'mor...","[{'id': 803, 'main': 'Clouds', 'description': ..."
...,...,...,...
240,"{'day': 10.66, 'min': 6.92, 'max': 10.66, 'nig...","{'day': 9.97, 'night': 3.18, 'eve': 5.2, 'morn...","[{'id': 500, 'main': 'Rain', 'description': 'l..."
241,"{'day': 7.48, 'min': 4.99, 'max': 8.31, 'night...","{'day': 6.17, 'night': 3.98, 'eve': 5.71, 'mor...","[{'id': 500, 'main': 'Rain', 'description': 'l..."
242,"{'day': 7.34, 'min': 6.2, 'max': 7.53, 'night'...","{'day': 5.19, 'night': 5.73, 'eve': 5.84, 'mor...","[{'id': 500, 'main': 'Rain', 'description': 'l..."
243,"{'day': 8.03, 'min': 5.83, 'max': 9.18, 'night...","{'day': 3.48, 'night': 1.16, 'eve': 3.22, 'mor...","[{'id': 803, 'main': 'Clouds', 'description': ..."


In [17]:
weekly_forecast_df.loc[0, "temp"]

{'day': 10.81,
 'min': 4.52,
 'max': 10.81,
 'night': 7.4,
 'eve': 7.46,
 'morn': 4.75}

In [18]:
len(weekly_forecast_df.loc[0, "temp"])

6

In [19]:
weekly_forecast_df.loc[0, "feels_like"]

{'day': 9.8, 'night': 6.81, 'eve': 7.46, 'morn': 3.15}

In [20]:
len(weekly_forecast_df.loc[0, "feels_like"])

4

In [21]:
weekly_forecast_df["temp"][0]

{'day': 10.81,
 'min': 4.52,
 'max': 10.81,
 'night': 7.4,
 'eve': 7.46,
 'morn': 4.75}

In [22]:
# keys for creating columns from splitting temperature column
temperature_keys = weekly_forecast_df["temp"][0].keys()

In [23]:
temperature_keys

dict_keys(['day', 'min', 'max', 'night', 'eve', 'morn'])

In [24]:
# splitting dictionary in column temp into several columns:
for row in range(0, len(weekly_forecast_df)):
    for key in temperature_keys:
        weekly_forecast_df.loc[row, "temp_"+key] = weekly_forecast_df["temp"][row][key]

In [25]:
feels_like_keys = weekly_forecast_df["feels_like"][0].keys()

In [26]:
type(feels_like_keys)

dict_keys

In [27]:
for row in range(0, len(weekly_forecast_df)): 
    for key in feels_like_keys:
        weekly_forecast_df.loc[row, "feels_like_"+key] = weekly_forecast_df["feels_like"][row][key]

In [28]:
weekly_forecast_df["weather"][0]

[{'id': 800, 'main': 'Clear', 'description': 'clear sky', 'icon': '01d'}]

In [29]:
type(weekly_forecast_df["weather"][0])

list

In [30]:
weather_keys = weekly_forecast_df["weather"][0][0].keys()

In [31]:
weather_keys

dict_keys(['id', 'main', 'description', 'icon'])

In [32]:
for row in range(0, len(weekly_forecast_df)): 
    for key in weather_keys:
        weekly_forecast_df.loc[row, "weather_"+key] = weekly_forecast_df["weather"][row][0][key]

In [33]:
weekly_forecast_df.head(5)

Unnamed: 0,dt,sunrise,sunset,moonrise,moonset,moon_phase,temp,feels_like,pressure,humidity,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,weather_icon
0,1676894400,1676876753,1676914445,1676878800,1676915460,0.0,"{'day': 10.81, 'min': 4.52, 'max': 10.81, 'nig...","{'day': 9.8, 'night': 6.81, 'eve': 7.46, 'morn...",1028,71,...,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky,01d
1,1676980800,1676963044,1677000942,1676966460,1677007020,0.04,"{'day': 12.5, 'min': 5.32, 'max': 13.44, 'nigh...","{'day': 11.48, 'night': 7.86, 'eve': 9.43, 'mo...",1018,64,...,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds,04d
2,1677067200,1677049333,1677087438,1677053940,1677098400,0.08,"{'day': 8, 'min': 4.48, 'max': 9.18, 'night': ...","{'day': 4.68, 'night': 2.21, 'eve': 2.76, 'mor...",1015,88,...,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain,10d
3,1677153600,1677135622,1677173935,1677141360,1677189600,0.12,"{'day': 8.1, 'min': 3.83, 'max': 9.03, 'night'...","{'day': 4.9, 'night': 2.04, 'eve': 2.94, 'morn...",1018,76,...,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain,10d
4,1677240000,1677221909,1677260431,1677228780,0,0.15,"{'day': 8.98, 'min': 2.31, 'max': 9.96, 'night...","{'day': 6.21, 'night': 1.99, 'eve': 4.91, 'mor...",1015,67,...,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds,04d


In [34]:
weekly_forecast_df.columns

Index(['dt', 'sunrise', 'sunset', 'moonrise', 'moonset', 'moon_phase', 'temp',
       'feels_like', 'pressure', 'humidity', 'dew_point', 'wind_speed',
       'wind_deg', 'wind_gust', 'weather', 'clouds', 'pop', 'uvi', 'rain',
       'snow', 'temp_day', 'temp_min', 'temp_max', 'temp_night', 'temp_eve',
       'temp_morn', 'feels_like_day', 'feels_like_night', 'feels_like_eve',
       'feels_like_morn', 'weather_id', 'weather_main', 'weather_description',
       'weather_icon'],
      dtype='object')

In [35]:
columns_to_drop = ['moonrise', 'moonset', 'moon_phase', 'temp',
       'feels_like', 'weather', 'weather_icon']
weather_df = weekly_forecast_df.drop(columns_to_drop, axis=1)

In [36]:
weather_df.head()

Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,clouds,...,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description
0,1676894400,1676876753,1676914445,1028,71,5.56,2.06,208,2.25,3,...,7.4,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky
1,1676980800,1676963044,1677000942,1018,64,5.72,2.67,139,4.01,93,...,8.66,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds
2,1677067200,1677049333,1677087438,1015,88,5.93,6.78,318,10.43,100,...,4.48,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain
3,1677153600,1677135622,1677173935,1018,76,3.98,7.16,31,12.58,100,...,6.18,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain
4,1677240000,1677221909,1677260431,1015,67,3.02,6.72,29,12.65,76,...,4.61,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds


In [37]:
timestamp_date_columns = ['dt', 'sunrise', 'sunset']
for column in timestamp_date_columns:
    timestamp_to_datetime(weather_df, column)

In [38]:
weather_df.head()

Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,clouds,...,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description
0,2023-02-20 13:00:00,2023-02-20 08:05:53,2023-02-20 18:34:05,1028,71,5.56,2.06,208,2.25,3,...,7.4,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky
1,2023-02-21 13:00:00,2023-02-21 08:04:04,2023-02-21 18:35:42,1018,64,5.72,2.67,139,4.01,93,...,8.66,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds
2,2023-02-22 13:00:00,2023-02-22 08:02:13,2023-02-22 18:37:18,1015,88,5.93,6.78,318,10.43,100,...,4.48,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain
3,2023-02-23 13:00:00,2023-02-23 08:00:22,2023-02-23 18:38:55,1018,76,3.98,7.16,31,12.58,100,...,6.18,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain
4,2023-02-24 13:00:00,2023-02-24 07:58:29,2023-02-24 18:40:31,1015,67,3.02,6.72,29,12.65,76,...,4.61,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds


In [39]:
"""def propagate_value(source_df, source_column, target_df, target_column, nb_insertions):
# takes a value from one dataframe and inserts it in several rows in another dataframe
    for source_row in range(0, len(source_df)):
        start_index = 0
        for target_row in range(start_index, nb_insertions):
            target_df.loc[target_row, target_column] = source_df.loc[source_row, source_column]
            start_index = start_index+7"""

    


'def propagate_value(source_df, source_column, target_df, target_column, nb_insertions):\n# takes a value from one dataframe and inserts it in several rows in another dataframe\n    for source_row in range(0, len(source_df)):\n        start_index = 0\n        for target_row in range(start_index, nb_insertions):\n            target_df.loc[target_row, target_column] = source_df.loc[source_row, source_column]\n            start_index = start_index+7'

In [40]:
#propagate_value(city_coord, 'city_name', weather_df, 'city_name', 7)

In [41]:
city_list = city_coord['city_name']

In [42]:
weather_df.head(10)

Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,clouds,...,temp_night,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description
0,2023-02-20 13:00:00,2023-02-20 08:05:53,2023-02-20 18:34:05,1028,71,5.56,2.06,208,2.25,3,...,7.4,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky
1,2023-02-21 13:00:00,2023-02-21 08:04:04,2023-02-21 18:35:42,1018,64,5.72,2.67,139,4.01,93,...,8.66,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds
2,2023-02-22 13:00:00,2023-02-22 08:02:13,2023-02-22 18:37:18,1015,88,5.93,6.78,318,10.43,100,...,4.48,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain
3,2023-02-23 13:00:00,2023-02-23 08:00:22,2023-02-23 18:38:55,1018,76,3.98,7.16,31,12.58,100,...,6.18,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain
4,2023-02-24 13:00:00,2023-02-24 07:58:29,2023-02-24 18:40:31,1015,67,3.02,6.72,29,12.65,76,...,4.61,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds
5,2023-02-25 13:00:00,2023-02-25 07:56:35,2023-02-25 18:42:07,1019,59,0.22,8.15,41,12.38,80,...,3.41,4.78,2.02,4.1,-0.54,0.41,-1.9,803.0,Clouds,broken clouds
6,2023-02-26 13:00:00,2023-02-26 07:54:41,2023-02-26 18:43:42,1024,48,-2.95,8.0,56,12.53,54,...,2.92,4.51,3.09,3.34,-1.39,0.64,-1.23,803.0,Clouds,broken clouds
7,2023-02-20 13:00:00,2023-02-20 08:07:58,2023-02-20 18:36:07,1028,75,5.68,3.16,220,3.49,0,...,7.87,7.4,6.05,9.96,7.16,6.05,4.0,800.0,Clear,clear sky
8,2023-02-21 13:00:00,2023-02-21 08:06:08,2023-02-21 18:37:44,1018,71,5.48,3.24,142,4.36,92,...,9.46,10.95,6.61,9.66,7.74,10.14,4.73,500.0,Rain,light rain
9,2023-02-22 13:00:00,2023-02-22 08:04:18,2023-02-22 18:39:21,1015,81,5.43,8.44,315,11.02,100,...,5.72,6.99,8.68,5.03,2.3,3.09,6.37,500.0,Rain,light rain


In [43]:
# adding city id and city name
"""for i in range(0, 7):
    city_index = 0
    weather_df.loc[i, "city_name"] = city_coord.loc[city_index, "city_name"]"""

'for i in range(0, 7):\n    city_index = 0\n    weather_df.loc[i, "city_name"] = city_coord.loc[city_index, "city_name"]'

In [44]:
"""for i in range(7, 14):
    city_index = 1
    weather_df.loc[i, "city_name"] = city_coord.loc[city_index, "city_name"]"""

'for i in range(7, 14):\n    city_index = 1\n    weather_df.loc[i, "city_name"] = city_coord.loc[city_index, "city_name"]'

In [45]:
"""start_index = 0
end_index = 7
for i in range(start_index, end_index):
    city_index = 0
    weather_df.loc[i, "city_name"] = city_coord.loc[city_index, "city_name"]"""


'start_index = 0\nend_index = 7\nfor i in range(start_index, end_index):\n    city_index = 0\n    weather_df.loc[i, "city_name"] = city_coord.loc[city_index, "city_name"]'

In [46]:
"""start_index = 0
end_index = 7
for city in range(0, len(city_coord)):
    for i in range(start_index, end_index):
        weather_df.loc[i, "city_name"] = city_coord.loc[city, "city_name"]
        print(start_index)
        print(end_index)
    start_index = start_index+7
    end_index = end_index+7"""

'start_index = 0\nend_index = 7\nfor city in range(0, len(city_coord)):\n    for i in range(start_index, end_index):\n        weather_df.loc[i, "city_name"] = city_coord.loc[city, "city_name"]\n        print(start_index)\n        print(end_index)\n    start_index = start_index+7\n    end_index = end_index+7'

In [47]:
target_df_start_index = 0
target_df_end_index = 7
for source_df_row in range(0, len(city_coord)):
    for target_df_row in range(target_df_start_index, target_df_end_index):
        weather_df.loc[target_df_row, "city_name"] = city_coord.loc[source_df_row, "city_name"]
    target_df_start_index = target_df_start_index+7
    target_df_end_index= target_df_end_index+7

In [48]:
weather_df.head(15)

Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,clouds,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,city_name
0,2023-02-20 13:00:00,2023-02-20 08:05:53,2023-02-20 18:34:05,1028,71,5.56,2.06,208,2.25,3,...,7.46,4.75,9.8,6.81,7.46,3.15,800.0,Clear,clear sky,Mont Saint-Michel
1,2023-02-21 13:00:00,2023-02-21 08:04:04,2023-02-21 18:35:42,1018,64,5.72,2.67,139,4.01,93,...,9.88,5.43,11.48,7.86,9.43,3.54,804.0,Clouds,overcast clouds,Mont Saint-Michel
2,2023-02-22 13:00:00,2023-02-22 08:02:13,2023-02-22 18:37:18,1015,88,5.93,6.78,318,10.43,100,...,6.31,8.7,4.68,2.21,2.76,7.03,500.0,Rain,light rain,Mont Saint-Michel
3,2023-02-23 13:00:00,2023-02-23 08:00:22,2023-02-23 18:38:55,1018,76,3.98,7.16,31,12.58,100,...,6.88,5.1,4.9,2.04,2.94,4.15,500.0,Rain,light rain,Mont Saint-Michel
4,2023-02-24 13:00:00,2023-02-24 07:58:29,2023-02-24 18:40:31,1015,67,3.02,6.72,29,12.65,76,...,6.93,2.54,6.21,1.99,4.91,-0.9,803.0,Clouds,broken clouds,Mont Saint-Michel
5,2023-02-25 13:00:00,2023-02-25 07:56:35,2023-02-25 18:42:07,1019,59,0.22,8.15,41,12.38,80,...,4.78,2.02,4.1,-0.54,0.41,-1.9,803.0,Clouds,broken clouds,Mont Saint-Michel
6,2023-02-26 13:00:00,2023-02-26 07:54:41,2023-02-26 18:43:42,1024,48,-2.95,8.0,56,12.53,54,...,4.51,3.09,3.34,-1.39,0.64,-1.23,803.0,Clouds,broken clouds,Mont Saint-Michel
7,2023-02-20 13:00:00,2023-02-20 08:07:58,2023-02-20 18:36:07,1028,75,5.68,3.16,220,3.49,0,...,7.4,6.05,9.96,7.16,6.05,4.0,800.0,Clear,clear sky,Saint-Malo
8,2023-02-21 13:00:00,2023-02-21 08:06:08,2023-02-21 18:37:44,1018,71,5.48,3.24,142,4.36,92,...,10.95,6.61,9.66,7.74,10.14,4.73,500.0,Rain,light rain,Saint-Malo
9,2023-02-22 13:00:00,2023-02-22 08:04:18,2023-02-22 18:39:21,1015,81,5.43,8.44,315,11.02,100,...,6.99,8.68,5.03,2.3,3.09,6.37,500.0,Rain,light rain,Saint-Malo


In [49]:
weather_df.tail(15)

Unnamed: 0,dt,sunrise,sunset,pressure,humidity,dew_point,wind_speed,wind_deg,wind_gust,clouds,...,temp_eve,temp_morn,feels_like_day,feels_like_night,feels_like_eve,feels_like_morn,weather_id,weather_main,weather_description,city_name
230,2023-02-26 13:00:00,2023-02-26 07:48:33,2023-02-26 18:50:09,1017,61,-0.52,5.99,45,9.68,18,...,6.44,1.62,4.23,0.91,3.59,-1.82,801.0,Clouds,few clouds,Biarritz
231,2023-02-20 13:00:00,2023-02-20 07:57:43,2023-02-20 18:41:57,1023,58,8.92,3.61,84,6.46,91,...,18.57,8.97,16.89,12.42,18.0,7.49,804.0,Clouds,overcast clouds,Bayonne
232,2023-02-21 13:00:00,2023-02-21 07:56:11,2023-02-21 18:43:17,1020,64,8.3,3.83,273,7.9,99,...,11.95,11.66,14.48,11.73,11.6,10.71,804.0,Clouds,overcast clouds,Bayonne
233,2023-02-22 13:00:00,2023-02-22 07:54:38,2023-02-22 18:44:36,1020,89,10.64,8.89,338,12.12,100,...,10.98,11.39,12.25,5.98,10.56,11.01,502.0,Rain,heavy intensity rain,Bayonne
234,2023-02-23 13:00:00,2023-02-23 07:53:04,2023-02-23 18:45:54,1016,83,4.51,7.34,318,12.07,100,...,6.83,7.74,7.43,5.45,6.83,5.49,502.0,Rain,heavy intensity rain,Bayonne
235,2023-02-24 13:00:00,2023-02-24 07:51:29,2023-02-24 18:47:13,1012,80,4.42,3.16,220,4.68,98,...,7.48,4.45,5.84,3.06,7.48,4.45,500.0,Rain,light rain,Bayonne
236,2023-02-25 13:00:00,2023-02-25 07:49:53,2023-02-25 18:48:31,1010,89,3.79,3.7,39,7.52,96,...,7.28,3.58,5.66,3.83,7.28,0.82,500.0,Rain,light rain,Bayonne
237,2023-02-26 13:00:00,2023-02-26 07:48:16,2023-02-26 18:49:49,1016,58,-0.62,5.17,45,10.54,12,...,6.08,1.1,5.35,0.84,3.59,-1.67,801.0,Clouds,few clouds,Bayonne
238,2023-02-20 13:00:00,2023-02-20 08:00:24,2023-02-20 18:36:42,1026,67,6.39,4.79,59,6.65,14,...,13.56,6.3,11.54,7.07,12.69,3.52,801.0,Clouds,few clouds,La Rochelle
239,2023-02-21 13:00:00,2023-02-21 07:58:43,2023-02-21 18:38:10,1017,73,7.61,4.75,231,8.03,100,...,10.35,8.7,11.58,7.61,9.97,6.38,804.0,Clouds,overcast clouds,La Rochelle


In [50]:
len(weather_df["city_name"].value_counts())

35

### Saving the dataframe locally and in bucket S3

(The cell related to the S3 bucket is commented in order not to rewrite accidentally the file every time the notebook is launched).

In [51]:
weather_df.to_csv("weather_forecast.csv")

In [52]:
"""# Access key for user with access to write in S3 bucket
S3_ACCESS_KEY_ID =  os.getenv("S3_ACCESS_KEY_ID")
# Secret key for user with access to write in S3 bucket 
S3_SECRET_ACCESS_KEY =  os.getenv("S3_SECRET_ACCESS_KEY")

# Writing the .csv file to bucket S3
session = boto3.Session(aws_access_key_id=S3_ACCESS_KEY_ID, 
                      aws_secret_access_key=S3_SECRET_ACCESS_KEY)
s3 = session.resource("s3")
bucket = s3.Bucket("kayak-booking-bucket-12-12-2022") 
bucket.upload_file("weather_forecast.csv", Key="weather_forecast.csv")"""