# CREATE FULL WEATHER AND EVENTS DF

In [1]:
import requests
import pandas as pd
import dateutil.parser

## 1. Getting daily sunrise and sunset times from the Open Meteo API

### 1.1 Getting the data from the API

In [2]:
start_date = "2014-01-01"
end_date = "2023-08-23"
url = 'https://archive-api.open-meteo.com/v1/archive'

In [3]:
params_daily_dict ={
        "latitude":"51.5085", #London latitude - should remain hardcoded
        "longitude":"-0.1780971", #London longitude -should remain hardcoded
        "start_date":start_date, #could be defined in .env and used in the other files
        "end_date":end_date, #could be defined in .env and used in the other files
        "timezone":"Europe/London", #Europe/London - specific to this api
        "daily":"sunrise,sunset" # specific to this api
}

daily_weather_response = requests.get(
        url,
        params=params_daily_dict).json()

In [4]:
sun_df =pd.DataFrame()
sun_df["sunrise"] = daily_weather_response["daily"]["sunrise"]
sun_df["sunset"] = daily_weather_response["daily"]["sunset"]
sun_df.head()

Unnamed: 0,sunrise,sunset
0,2014-01-01T09:06,2014-01-01T17:02
1,2014-01-02T09:06,2014-01-02T17:03
2,2014-01-03T09:06,2014-01-03T17:04
3,2014-01-04T09:05,2014-01-04T17:05
4,2014-01-05T09:05,2014-01-05T17:06


### 1.2 Daytime/nightime encoding

In [5]:
def date_parser(time):
    date_parser = dateutil.parser.isoparse(time)
    return date_parser

In [6]:
sun_df["sunrise_datetime"] = sun_df.apply(lambda x: date_parser(x["sunrise"]), axis = 1)

In [7]:
sun_df["sunset_datetime"] = sun_df.apply(lambda x: date_parser(x["sunset"]), axis = 1)

In [8]:
sun_df['date'] = sun_df['sunrise_datetime'].dt.date

In [9]:
sun_df.head()

Unnamed: 0,sunrise,sunset,sunrise_datetime,sunset_datetime,date
0,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,2014-01-01
1,2014-01-02T09:06,2014-01-02T17:03,2014-01-02 09:06:00,2014-01-02 17:03:00,2014-01-02
2,2014-01-03T09:06,2014-01-03T17:04,2014-01-03 09:06:00,2014-01-03 17:04:00,2014-01-03
3,2014-01-04T09:05,2014-01-04T17:05,2014-01-04 09:05:00,2014-01-04 17:05:00,2014-01-04
4,2014-01-05T09:05,2014-01-05T17:06,2014-01-05 09:05:00,2014-01-05 17:06:00,2014-01-05


In [10]:
def daytime_encoding(timestamp, sunrise_datetime, sunset_datetime):
    if timestamp < sunrise_datetime:
        daytime_encoding = "nighttime"
    elif timestamp >= sunrise_datetime and timestamp < sunset_datetime:
        daytime_encoding = "daytime"
    else:
        daytime_encoding = "nighttime"
    return daytime_encoding

## 2. Getting hourly weather data from the Open Meteo API

### 2.1 API data

In [11]:
params_hourly_dict ={
        "latitude":"51.5085", #London latitude - should remain hardcoded
        "longitude":"-0.1780971", #London longitude -should remain hardcoded
        "start_date":start_date, #could be defined in .env and used in the other files
        "end_date":end_date, #could be defined in .env and used in the other files
        "timezone":"Europe/London", #Europe/London - specific to this api
        "hourly":"temperature_2m,precipitation,rain,snowfall,cloudcover,windspeed_10m,winddirection_10m" # specific to this api
}

hourly_weather_response = requests.get(
    url,
    params=params_hourly_dict).json()

In [12]:
timestamp_api = hourly_weather_response["hourly"]["time"]
temperature_api = hourly_weather_response["hourly"]["temperature_2m"]
precipitation_api = hourly_weather_response["hourly"]["precipitation"]
rain_api = hourly_weather_response["hourly"]["rain"]
snow_api = hourly_weather_response["hourly"]["snowfall"]
cloudcover_api = hourly_weather_response["hourly"]["cloudcover"]
windspeed_api = hourly_weather_response["hourly"]["windspeed_10m"]
winddirection_api = hourly_weather_response["hourly"]["winddirection_10m"]

### 2.2 Put the data into a df

In [13]:
weather_data = pd.DataFrame()
weather_data["timestamp"] = timestamp_api
weather_data["temperature"] = temperature_api
weather_data["precipitation"] = precipitation_api
weather_data["rainfall"] = rain_api
weather_data["snowfall"] = snow_api
weather_data["cloudcover"] = cloudcover_api
weather_data["wind_speed"] = windspeed_api
weather_data["wind_direction"]= winddirection_api

In [14]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction
0,2014-01-01T00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0
1,2014-01-01T01:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0
2,2014-01-01T02:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0
3,2014-01-01T03:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0
4,2014-01-01T04:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0
...,...,...,...,...,...,...,...,...
84523,2023-08-23T19:00,,,,,,,
84524,2023-08-23T20:00,,,,,,,
84525,2023-08-23T21:00,,,,,,,
84526,2023-08-23T22:00,,,,,,,


### 2.3 Timestamp recoding

In [15]:
weather_data["timestamp"] = weather_data.apply(lambda x: date_parser(x["timestamp"]), axis = 1)

In [16]:
weather_data.head()

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0


### 2.4 Add the daytime & night time encoding to the full df

In [17]:
weather_data['date'] = weather_data['timestamp'].dt.date

In [18]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0,2014-01-01
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0,2014-01-01
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0,2014-01-01
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0,2014-01-01
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0,2014-01-01
...,...,...,...,...,...,...,...,...,...
84523,2023-08-23 19:00:00,,,,,,,,2023-08-23
84524,2023-08-23 20:00:00,,,,,,,,2023-08-23
84525,2023-08-23 21:00:00,,,,,,,,2023-08-23
84526,2023-08-23 22:00:00,,,,,,,,2023-08-23


In [19]:
weather_data = weather_data.merge(sun_df)

In [20]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise,sunset,sunrise_datetime,sunset_datetime
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84523,2023-08-23 19:00:00,,,,,,,,2023-08-23,2023-08-23T05:58,2023-08-23T20:07,2023-08-23 05:58:00,2023-08-23 20:07:00
84524,2023-08-23 20:00:00,,,,,,,,2023-08-23,2023-08-23T05:58,2023-08-23T20:07,2023-08-23 05:58:00,2023-08-23 20:07:00
84525,2023-08-23 21:00:00,,,,,,,,2023-08-23,2023-08-23T05:58,2023-08-23T20:07,2023-08-23 05:58:00,2023-08-23 20:07:00
84526,2023-08-23 22:00:00,,,,,,,,2023-08-23,2023-08-23T05:58,2023-08-23T20:07,2023-08-23 05:58:00,2023-08-23 20:07:00


In [21]:
weather_data["encoding"] = weather_data.apply(lambda x: daytime_encoding(x["timestamp"], x["sunrise_datetime"], x["sunset_datetime"]), axis = 1)

In [22]:
weather_data.head(20)

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise,sunset,sunrise_datetime,sunset_datetime,encoding
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
5,2014-01-01 05:00:00,6.2,0.0,0.0,0.0,73.0,22.6,205.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
6,2014-01-01 06:00:00,6.1,0.0,0.0,0.0,79.0,21.8,202.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
7,2014-01-01 07:00:00,6.4,0.0,0.0,0.0,71.0,21.9,197.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
8,2014-01-01 08:00:00,7.2,0.0,0.0,0.0,100.0,23.2,192.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
9,2014-01-01 09:00:00,7.3,0.0,0.0,0.0,100.0,26.0,185.0,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime


In [23]:
weather_data_final = weather_data.drop(columns={"sunrise", "sunset"}, axis = 1)

In [24]:
weather_data_final

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,sunset_datetime,encoding
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
...,...,...,...,...,...,...,...,...,...,...,...,...
84523,2023-08-23 19:00:00,,,,,,,,2023-08-23,2023-08-23 05:58:00,2023-08-23 20:07:00,daytime
84524,2023-08-23 20:00:00,,,,,,,,2023-08-23,2023-08-23 05:58:00,2023-08-23 20:07:00,daytime
84525,2023-08-23 21:00:00,,,,,,,,2023-08-23,2023-08-23 05:58:00,2023-08-23 20:07:00,nighttime
84526,2023-08-23 22:00:00,,,,,,,,2023-08-23,2023-08-23 05:58:00,2023-08-23 20:07:00,nighttime


## 3. Add events to main dataframe

### 3.1 Load the events_df

In [25]:
all_events_df = pd.read_csv("../raw_data/all_events_df.csv")

In [26]:
all_events_df.head()

Unnamed: 0.1,Unnamed: 0,title,start_date,end_date,Location,Latitude,Longitude
0,0,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide
1,1,Good Friday,2014-04-18,2014-04-18,London-wide,London-wide,London-wide
2,2,Easter Monday,2014-04-21,2014-04-21,London-wide,London-wide,London-wide
3,3,Early May bank holiday,2014-05-05,2014-05-05,London-wide,London-wide,London-wide
4,4,Spring bank holiday,2014-05-26,2014-05-26,London-wide,London-wide,London-wide


### 3.2 Merge with main df

In [27]:
new_weat_tmp= weather_data_final.copy()

In [29]:
new_weat_tmp.head()

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,sunset_datetime,encoding
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87.0,22.3,194.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100.0,24.6,201.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63.0,25.0,205.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40.0,24.5,208.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84.0,23.0,207.0,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime


### 3.3 Replace "London-wide" latitude and longitude with London lat and long

In [None]:
new_weat_tmp= weather_data_final.copy()

In [None]:
#new_weat_tmp = new_weat_tmp.merge(events_df)