# CREATE FULL WEATHER AND EVENTS DF

In [1]:
import requests
import pandas as pd
import dateutil.parser

## 1. Getting daily sunrise and sunset times from the Open Meteo API

### 1.1 Getting the data from the API

In [2]:
start_date = "2014-01-01"
end_date = "2023-08-20"
url = 'https://archive-api.open-meteo.com/v1/archive'

In [3]:
params_daily_dict ={
        "latitude":"51.5085", #London latitude - should remain hardcoded
        "longitude":"-0.1780971", #London longitude -should remain hardcoded
        "start_date":start_date, #could be defined in .env and used in the other files
        "end_date":end_date, #could be defined in .env and used in the other files
        "timezone":"Europe/London", #Europe/London - specific to this api
        "daily":"sunrise,sunset" # specific to this api
}

daily_weather_response = requests.get(
        url,
        params=params_daily_dict).json()

In [4]:
sun_df =pd.DataFrame()
sun_df["sunrise"] = daily_weather_response["daily"]["sunrise"]
sun_df["sunset"] = daily_weather_response["daily"]["sunset"]
sun_df.head()

Unnamed: 0,sunrise,sunset
0,2014-01-01T09:06,2014-01-01T17:02
1,2014-01-02T09:06,2014-01-02T17:03
2,2014-01-03T09:06,2014-01-03T17:04
3,2014-01-04T09:05,2014-01-04T17:05
4,2014-01-05T09:05,2014-01-05T17:06


### 1.2 Daytime/nightime encoding

In [5]:
def date_parser(time):
    date_parser = dateutil.parser.isoparse(time)
    return date_parser

In [6]:
sun_df["sunrise_datetime"] = sun_df.apply(lambda x: date_parser(x["sunrise"]), axis = 1)

In [7]:
sun_df["sunset_datetime"] = sun_df.apply(lambda x: date_parser(x["sunset"]), axis = 1)

In [8]:
sun_df['date'] = sun_df['sunrise_datetime'].dt.date

In [9]:
sun_df.head()

Unnamed: 0,sunrise,sunset,sunrise_datetime,sunset_datetime,date
0,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,2014-01-01
1,2014-01-02T09:06,2014-01-02T17:03,2014-01-02 09:06:00,2014-01-02 17:03:00,2014-01-02
2,2014-01-03T09:06,2014-01-03T17:04,2014-01-03 09:06:00,2014-01-03 17:04:00,2014-01-03
3,2014-01-04T09:05,2014-01-04T17:05,2014-01-04 09:05:00,2014-01-04 17:05:00,2014-01-04
4,2014-01-05T09:05,2014-01-05T17:06,2014-01-05 09:05:00,2014-01-05 17:06:00,2014-01-05


In [10]:
def daytime_encoding(timestamp, sunrise_datetime, sunset_datetime):
    if timestamp < sunrise_datetime:
        daytime_encoding = "nighttime"
    elif timestamp >= sunrise_datetime and timestamp < sunset_datetime:
        daytime_encoding = "daytime"
    else:
        daytime_encoding = "nighttime"
    return daytime_encoding

## 2. Getting hourly weather data from the Open Meteo API

### 2.1 API data

In [11]:
params_hourly_dict ={
        "latitude":"51.5085", #London latitude - should remain hardcoded
        "longitude":"-0.1780971", #London longitude -should remain hardcoded
        "start_date":start_date, #could be defined in .env and used in the other files
        "end_date":end_date, #could be defined in .env and used in the other files
        "timezone":"Europe/London", #Europe/London - specific to this api
        "hourly":"temperature_2m,precipitation,rain,snowfall,cloudcover,windspeed_10m,winddirection_10m" # specific to this api
}

hourly_weather_response = requests.get(
    url,
    params=params_hourly_dict).json()

In [12]:
timestamp_api = hourly_weather_response["hourly"]["time"]
temperature_api = hourly_weather_response["hourly"]["temperature_2m"]
precipitation_api = hourly_weather_response["hourly"]["precipitation"]
rain_api = hourly_weather_response["hourly"]["rain"]
snow_api = hourly_weather_response["hourly"]["snowfall"]
cloudcover_api = hourly_weather_response["hourly"]["cloudcover"]
windspeed_api = hourly_weather_response["hourly"]["windspeed_10m"]
winddirection_api = hourly_weather_response["hourly"]["winddirection_10m"]

### 2.2 Put the data into a df

In [13]:
weather_data = pd.DataFrame()
weather_data["timestamp"] = timestamp_api
weather_data["temperature"] = temperature_api
weather_data["precipitation"] = precipitation_api
weather_data["rainfall"] = rain_api
weather_data["snowfall"] = snow_api
weather_data["cloudcover"] = cloudcover_api
weather_data["wind_speed"] = windspeed_api
weather_data["wind_direction"]= winddirection_api

In [14]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction
0,2014-01-01T00:00,6.2,0.0,0.0,0.0,87,22.3,194
1,2014-01-01T01:00,6.4,0.0,0.0,0.0,100,24.6,201
2,2014-01-01T02:00,6.5,0.0,0.0,0.0,63,25.0,205
3,2014-01-01T03:00,6.5,0.0,0.0,0.0,40,24.5,208
4,2014-01-01T04:00,6.4,0.0,0.0,0.0,84,23.0,207
...,...,...,...,...,...,...,...,...
84451,2023-08-20T19:00,21.4,0.0,0.0,0.0,33,12.0,226
84452,2023-08-20T20:00,20.1,0.0,0.0,0.0,33,10.9,224
84453,2023-08-20T21:00,18.6,0.0,0.0,0.0,27,10.7,222
84454,2023-08-20T22:00,17.8,0.0,0.0,0.0,19,10.4,226


### 2.3 Timestamp recoding

In [15]:
weather_data["timestamp"] = weather_data.apply(lambda x: date_parser(x["timestamp"]), axis = 1)

In [16]:
weather_data.head()

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207


### 2.4 Add the daytime & night time encoding to the full df

In [17]:
weather_data['date'] = weather_data['timestamp'].dt.date

In [18]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01
...,...,...,...,...,...,...,...,...,...
84451,2023-08-20 19:00:00,21.4,0.0,0.0,0.0,33,12.0,226,2023-08-20
84452,2023-08-20 20:00:00,20.1,0.0,0.0,0.0,33,10.9,224,2023-08-20
84453,2023-08-20 21:00:00,18.6,0.0,0.0,0.0,27,10.7,222,2023-08-20
84454,2023-08-20 22:00:00,17.8,0.0,0.0,0.0,19,10.4,226,2023-08-20


In [19]:
weather_data = weather_data.merge(sun_df)

In [20]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise,sunset,sunrise_datetime,sunset_datetime
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84451,2023-08-20 19:00:00,21.4,0.0,0.0,0.0,33,12.0,226,2023-08-20,2023-08-20T05:54,2023-08-20T20:14,2023-08-20 05:54:00,2023-08-20 20:14:00
84452,2023-08-20 20:00:00,20.1,0.0,0.0,0.0,33,10.9,224,2023-08-20,2023-08-20T05:54,2023-08-20T20:14,2023-08-20 05:54:00,2023-08-20 20:14:00
84453,2023-08-20 21:00:00,18.6,0.0,0.0,0.0,27,10.7,222,2023-08-20,2023-08-20T05:54,2023-08-20T20:14,2023-08-20 05:54:00,2023-08-20 20:14:00
84454,2023-08-20 22:00:00,17.8,0.0,0.0,0.0,19,10.4,226,2023-08-20,2023-08-20T05:54,2023-08-20T20:14,2023-08-20 05:54:00,2023-08-20 20:14:00


In [21]:
weather_data["encoding"] = weather_data.apply(lambda x: daytime_encoding(x["timestamp"], x["sunrise_datetime"], x["sunset_datetime"]), axis = 1)

In [22]:
weather_data.head(20)

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise,sunset,sunrise_datetime,sunset_datetime,encoding
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
5,2014-01-01 05:00:00,6.2,0.0,0.0,0.0,73,22.6,205,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
6,2014-01-01 06:00:00,6.1,0.0,0.0,0.0,79,21.8,202,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
7,2014-01-01 07:00:00,6.4,0.0,0.0,0.0,71,21.9,197,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
8,2014-01-01 08:00:00,7.2,0.0,0.0,0.0,100,23.2,192,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
9,2014-01-01 09:00:00,7.3,0.0,0.0,0.0,100,26.0,185,2014-01-01,2014-01-01T09:06,2014-01-01T17:02,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime


In [23]:
weather_data_final = weather_data.drop(columns={"sunrise", "sunset"}, axis = 1)

In [24]:
weather_data_final

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,sunset_datetime,encoding
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01,2014-01-01 09:06:00,2014-01-01 17:02:00,nighttime
...,...,...,...,...,...,...,...,...,...,...,...,...
84451,2023-08-20 19:00:00,21.4,0.0,0.0,0.0,33,12.0,226,2023-08-20,2023-08-20 05:54:00,2023-08-20 20:14:00,daytime
84452,2023-08-20 20:00:00,20.1,0.0,0.0,0.0,33,10.9,224,2023-08-20,2023-08-20 05:54:00,2023-08-20 20:14:00,daytime
84453,2023-08-20 21:00:00,18.6,0.0,0.0,0.0,27,10.7,222,2023-08-20,2023-08-20 05:54:00,2023-08-20 20:14:00,nighttime
84454,2023-08-20 22:00:00,17.8,0.0,0.0,0.0,19,10.4,226,2023-08-20,2023-08-20 05:54:00,2023-08-20 20:14:00,nighttime


In [25]:
weather_data_final.to_csv('../../../raw_data/weather_data_final.csv')  

## 3. Add events to main dataframe

### 3.1 Load the events_df

In [26]:
pwd

'/home/catherine/code/elsebasmar/london-bss/londonbss/notebooks/data_collection'

In [27]:
all_events_df = pd.read_csv("../../../raw_data/all_events_df.csv")

In [28]:
all_events_df

Unnamed: 0.1,Unnamed: 0,title,start_date,end_date,Location,Latitude,Longitude
0,0,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide
1,1,Good Friday,2014-04-18,2014-04-18,London-wide,London-wide,London-wide
2,2,Easter Monday,2014-04-21,2014-04-21,London-wide,London-wide,London-wide
3,3,Early May bank holiday,2014-05-05,2014-05-05,London-wide,London-wide,London-wide
4,4,Spring bank holiday,2014-05-26,2014-05-26,London-wide,London-wide,London-wide
...,...,...,...,...,...,...,...
943,943,FA cup final,2018-05-19,2018-05-19,Wembley Stadium,51.55616476,-0.279596246
944,944,FA cup final,2019-05-18,2019-05-18,Wembley Stadium,51.55616476,-0.279596246
945,945,FA cup final,2021-05-15,2021-05-15,Wembley Stadium,51.55616476,-0.279596246
946,946,FA cup final,2021-05-14,2021-05-14,Wembley Stadium,51.55616476,-0.279596246


In [29]:
all_events_df.dtypes

Unnamed: 0     int64
title         object
start_date    object
end_date      object
Location      object
Latitude      object
Longitude     object
dtype: object

In [30]:
all_events_df.drop("Unnamed: 0",axis =1, inplace=True)

In [31]:
all_events_df.columns

Index(['title', 'start_date', 'end_date', 'Location', 'Latitude', 'Longitude'], dtype='object')

In [32]:
new_column_list = ("event_title", "event_start_date", "event_end_date", "event_location", "event_latitude", "event_longitude")

In [33]:
all_events_df.columns = new_column_list

In [34]:
all_events_df["event_start_date"]= pd.to_datetime(all_events_df["event_start_date"])

In [35]:
all_events_df["event_end_date"]= pd.to_datetime(all_events_df["event_end_date"])

In [36]:
all_events_df['date'] = all_events_df['event_start_date'].dt.date

In [37]:
all_events_df.head()

Unnamed: 0,event_title,event_start_date,event_end_date,event_location,event_latitude,event_longitude,date
0,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,2014-01-01
1,Good Friday,2014-04-18,2014-04-18,London-wide,London-wide,London-wide,2014-04-18
2,Easter Monday,2014-04-21,2014-04-21,London-wide,London-wide,London-wide,2014-04-21
3,Early May bank holiday,2014-05-05,2014-05-05,London-wide,London-wide,London-wide,2014-05-05
4,Spring bank holiday,2014-05-26,2014-05-26,London-wide,London-wide,London-wide,2014-05-26


### 3.2 Merge with main df

In [38]:
weather_events_data = weather_data_final.copy()

In [39]:
weather_events_data.shape

(84456, 12)

In [40]:
weather_events_data = weather_events_data.merge(all_events_df, on="date", how="left")

In [41]:
weather_events_data.shape

(86784, 18)

In [42]:
weather_events_data.to_csv('../../../raw_data/weather_events_data.csv')

## 4. Add Elisabeth line column

In [43]:
we_ev_el = weather_events_data.copy()

In [44]:
#Elisabeth line first day = 24/05/2022

In [45]:
we_ev_el["date"] = pd.to_datetime(we_ev_el["date"])

In [46]:
we_ev_el["elisabeth_line"] = "True"

In [47]:
we_ev_el.loc[we_ev_el["date"] < "2022-05-24", "elisabeth_line"] = False

In [48]:
we_ev_el.shape

(86784, 19)

In [49]:
we_ev_el.to_csv('../../../raw_data/we_ev_el.csv')

## 5. Add lockdown column

In [50]:
# lockdown 1 = 24/03/2020 to 28/05/2020
# lockdown 2 = 05/11/2020 to 02/12/2020
# lockdown 3 = 04/01/2021 to 12/04/2021

In [51]:
weevel_lo = we_ev_el.copy()

In [52]:
from datetime import datetime

In [53]:
lockdown1_start = datetime.strptime("2020-03-24", '%Y-%m-%d')
lockdown1_end = datetime.strptime("2020-05-28", '%Y-%m-%d')
lockdown2_start = datetime.strptime("2020-11-05", '%Y-%m-%d')
lockdown2_end = datetime.strptime("2020-12-02", '%Y-%m-%d')
lockdown3_start = datetime.strptime("2021-01-04", '%Y-%m-%d')
lockdown3_end = datetime.strptime("2021-04-12", '%Y-%m-%d')

In [54]:
def lockdown_date(date):
    if date > lockdown1_start and date <= lockdown1_end:
        return True
    elif date > lockdown2_start and date <= lockdown2_end:
        return True
    elif date > lockdown3_start and date <= lockdown3_end:
        return True
    else:
        return False

In [55]:
# test_date = datetime.strptime("2020-12-13", '%Y-%m-%d')
# print(lockdown_date(test_date, True))
#weather_data["encoding"] = weather_data.apply(lambda x: daytime_encoding(x["timestamp"], x["sunrise_datetime"], x["sunset_datetime"]), axis = 1)

In [56]:
weevel_lo["lockdown"] = weevel_lo["date"].apply(lockdown_date)
weevel_lo.lockdown.value_counts()

lockdown
False    82224
True      4560
Name: count, dtype: int64

In [57]:
weevel_lo.to_csv('../../../raw_data/weevel_lo.csv')

## 6. School holidays

In [58]:
scho_hol = weevel_lo.copy()

In [59]:
school_holidays = pd.read_csv("../../../raw_data/school_holidays.csv")

In [60]:
school_holidays.columns

Index(['school_holidays', 'date'], dtype='object')

In [61]:
school_holidays["date"] = pd.to_datetime(school_holidays["date"])

In [62]:
scho_hol = scho_hol.merge(school_holidays, on="date", how="left")

In [63]:
scho_hol.head(100)

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,...,encoding,event_title,event_start_date,event_end_date,event_location,event_latitude,event_longitude,elisabeth_line,lockdown,school_holidays
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01,2014-01-01 09:06:00,...,nighttime,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01,2014-01-01 09:06:00,...,nighttime,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01,2014-01-01 09:06:00,...,nighttime,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01,2014-01-01 09:06:00,...,nighttime,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01,2014-01-01 09:06:00,...,nighttime,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2014-01-04 23:00:00,3.5,0.0,0.0,0.0,4,15.0,240,2014-01-04,2014-01-04 09:05:00,...,nighttime,,NaT,NaT,,,,False,False,christmas
96,2014-01-05 00:00:00,2.9,0.0,0.0,0.0,0,13.9,239,2014-01-05,2014-01-05 09:05:00,...,nighttime,,NaT,NaT,,,,False,False,christmas
97,2014-01-05 01:00:00,2.5,0.0,0.0,0.0,26,13.4,239,2014-01-05,2014-01-05 09:05:00,...,nighttime,,NaT,NaT,,,,False,False,christmas
98,2014-01-05 02:00:00,2.4,0.0,0.0,0.0,68,12.7,235,2014-01-05,2014-01-05 09:05:00,...,nighttime,,NaT,NaT,,,,False,False,christmas


In [64]:
scho_hol.to_csv('../../../raw_data/scho_hol.csv')

## 7. Strikes

In [65]:
strikes_df = scho_hol.copy()

In [66]:
strikes = pd.read_csv("../../../raw_data/strikes.csv")

In [67]:
strikes.columns

Index(['strike', 'date'], dtype='object')

In [68]:
strikes["date"] = pd.to_datetime(strikes["date"])

In [69]:
strikes_df = strikes_df.merge(strikes, on="date", how="left")

In [70]:
strikes_df.head()

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,...,event_title,event_start_date,event_end_date,event_location,event_latitude,event_longitude,elisabeth_line,lockdown,school_holidays,strike
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01,2014-01-01 09:06:00,...,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01,2014-01-01 09:06:00,...,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01,2014-01-01 09:06:00,...,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01,2014-01-01 09:06:00,...,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01,2014-01-01 09:06:00,...,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,


In [71]:
strikes_df.to_csv('../../../raw_data/strikes_df.csv')

## 8. Add weekday

In [72]:
weekday_df = strikes_df.copy()

In [73]:
def is_weekday(date):
    day_of_the_week = date.weekday()
    return day_of_the_week

In [76]:
weekday_df["weekday"] = weekday_df.apply(lambda x: is_weekday(x["timestamp"]), axis = 1)

In [77]:
weekday_df.head(50)

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,...,event_start_date,event_end_date,event_location,event_latitude,event_longitude,elisabeth_line,lockdown,school_holidays,strike,weekday
0,2014-01-01 00:00:00,6.2,0.0,0.0,0.0,87,22.3,194,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
1,2014-01-01 01:00:00,6.4,0.0,0.0,0.0,100,24.6,201,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
2,2014-01-01 02:00:00,6.5,0.0,0.0,0.0,63,25.0,205,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
3,2014-01-01 03:00:00,6.5,0.0,0.0,0.0,40,24.5,208,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
4,2014-01-01 04:00:00,6.4,0.0,0.0,0.0,84,23.0,207,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
5,2014-01-01 05:00:00,6.2,0.0,0.0,0.0,73,22.6,205,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
6,2014-01-01 06:00:00,6.1,0.0,0.0,0.0,79,21.8,202,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
7,2014-01-01 07:00:00,6.4,0.0,0.0,0.0,71,21.9,197,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
8,2014-01-01 08:00:00,7.2,0.0,0.0,0.0,100,23.2,192,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2
9,2014-01-01 09:00:00,7.3,0.0,0.0,0.0,100,26.0,185,2014-01-01,2014-01-01 09:06:00,...,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,False,False,christmas,,2


## Final df export

In [78]:
final_features_df = weekday_df.copy()

In [79]:
final_features_df.shape

(86904, 23)

In [80]:
final_features_df.to_csv('../../../raw_data/final_features_df.csv')