# CREATE FULL WEATHER AND EVENTS DF

In [1]:
import requests
import pandas as pd
import dateutil.parser

## 1. Getting daily sunrise and sunset times from the Open Meteo API

### 1.1 Getting the data from the API

In [2]:
start_date = "2022-06-21"
end_date = "2022-09-23"
url = 'https://archive-api.open-meteo.com/v1/archive'

In [3]:
params_daily_dict ={
        "latitude":"51.5085", #London latitude - should remain hardcoded
        "longitude":"-0.1780971", #London longitude -should remain hardcoded
        "start_date":start_date, #could be defined in .env and used in the other files
        "end_date":end_date, #could be defined in .env and used in the other files
        "timezone":"Europe/London", #Europe/London - specific to this api
        "daily":"sunrise,sunset" # specific to this api
}

daily_weather_response = requests.get(
        url,
        params=params_daily_dict).json()

In [4]:
sun_df =pd.DataFrame()
sun_df["sunrise"] = daily_weather_response["daily"]["sunrise"]
sun_df["sunset"] = daily_weather_response["daily"]["sunset"]
sun_df.head()

Unnamed: 0,sunrise,sunset
0,2022-06-21T04:43,2022-06-21T21:21
1,2022-06-22T04:43,2022-06-22T21:21
2,2022-06-23T04:43,2022-06-23T21:21
3,2022-06-24T04:44,2022-06-24T21:22
4,2022-06-25T04:44,2022-06-25T21:22


### 1.2 Daytime/nightime encoding

In [5]:
def date_parser(time):
    date_parser = dateutil.parser.isoparse(time)
    return date_parser

In [6]:
sun_df["sunrise_datetime"] = sun_df.apply(lambda x: date_parser(x["sunrise"]), axis = 1)

In [7]:
sun_df["sunset_datetime"] = sun_df.apply(lambda x: date_parser(x["sunset"]), axis = 1)

In [8]:
sun_df['date'] = sun_df['sunrise_datetime'].dt.date

In [9]:
sun_df.head()

Unnamed: 0,sunrise,sunset,sunrise_datetime,sunset_datetime,date
0,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,2022-06-21
1,2022-06-22T04:43,2022-06-22T21:21,2022-06-22 04:43:00,2022-06-22 21:21:00,2022-06-22
2,2022-06-23T04:43,2022-06-23T21:21,2022-06-23 04:43:00,2022-06-23 21:21:00,2022-06-23
3,2022-06-24T04:44,2022-06-24T21:22,2022-06-24 04:44:00,2022-06-24 21:22:00,2022-06-24
4,2022-06-25T04:44,2022-06-25T21:22,2022-06-25 04:44:00,2022-06-25 21:22:00,2022-06-25


In [10]:
def daytime_encoding(timestamp, sunrise_datetime, sunset_datetime):
    if timestamp < sunrise_datetime:
        daytime_encoding = "nighttime"
    elif timestamp >= sunrise_datetime and timestamp < sunset_datetime:
        daytime_encoding = "daytime"
    else:
        daytime_encoding = "nighttime"
    return daytime_encoding

## 2. Getting hourly weather data from the Open Meteo API

### 2.1 API data

In [11]:
params_hourly_dict ={
        "latitude":"51.5085", #London latitude - should remain hardcoded
        "longitude":"-0.1780971", #London longitude -should remain hardcoded
        "start_date":start_date, #could be defined in .env and used in the other files
        "end_date":end_date, #could be defined in .env and used in the other files
        "timezone":"Europe/London", #Europe/London - specific to this api
        "hourly":"temperature_2m,precipitation,rain,snowfall,cloudcover,windspeed_10m,winddirection_10m" # specific to this api
}

hourly_weather_response = requests.get(
    url,
    params=params_hourly_dict).json()

In [12]:
timestamp_api = hourly_weather_response["hourly"]["time"]
temperature_api = hourly_weather_response["hourly"]["temperature_2m"]
precipitation_api = hourly_weather_response["hourly"]["precipitation"]
rain_api = hourly_weather_response["hourly"]["rain"]
snow_api = hourly_weather_response["hourly"]["snowfall"]
cloudcover_api = hourly_weather_response["hourly"]["cloudcover"]
windspeed_api = hourly_weather_response["hourly"]["windspeed_10m"]
winddirection_api = hourly_weather_response["hourly"]["winddirection_10m"]

### 2.2 Put the data into a df

In [13]:
weather_data = pd.DataFrame()
weather_data["timestamp"] = timestamp_api
weather_data["temperature"] = temperature_api
weather_data["precipitation"] = precipitation_api
weather_data["rainfall"] = rain_api
weather_data["snowfall"] = snow_api
weather_data["cloudcover"] = cloudcover_api
weather_data["wind_speed"] = windspeed_api
weather_data["wind_direction"]= winddirection_api

In [14]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction
0,2022-06-21T00:00,12.5,0.0,0.0,0.0,11,8.8,348
1,2022-06-21T01:00,11.5,0.0,0.0,0.0,0,7.6,355
2,2022-06-21T02:00,10.6,0.0,0.0,0.0,1,8.0,333
3,2022-06-21T03:00,9.9,0.0,0.0,0.0,1,8.1,283
4,2022-06-21T04:00,9.4,0.0,0.0,0.0,8,3.2,360
...,...,...,...,...,...,...,...,...
2275,2022-09-23T19:00,15.9,0.0,0.0,0.0,60,10.5,59
2276,2022-09-23T20:00,14.3,0.6,0.6,0.0,81,9.0,61
2277,2022-09-23T21:00,14.2,0.0,0.0,0.0,76,9.8,62
2278,2022-09-23T22:00,13.8,0.0,0.0,0.0,73,10.8,69


### 2.3 Timestamp recoding

In [15]:
weather_data["timestamp"] = weather_data.apply(lambda x: date_parser(x["timestamp"]), axis = 1)

In [16]:
weather_data.head()

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348
1,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355
2,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333
3,2022-06-21 03:00:00,9.9,0.0,0.0,0.0,1,8.1,283
4,2022-06-21 04:00:00,9.4,0.0,0.0,0.0,8,3.2,360


### 2.4 Add the daytime & night time encoding to the full df

In [17]:
weather_data['date'] = weather_data['timestamp'].dt.date

In [18]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21
1,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21
2,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333,2022-06-21
3,2022-06-21 03:00:00,9.9,0.0,0.0,0.0,1,8.1,283,2022-06-21
4,2022-06-21 04:00:00,9.4,0.0,0.0,0.0,8,3.2,360,2022-06-21
...,...,...,...,...,...,...,...,...,...
2275,2022-09-23 19:00:00,15.9,0.0,0.0,0.0,60,10.5,59,2022-09-23
2276,2022-09-23 20:00:00,14.3,0.6,0.6,0.0,81,9.0,61,2022-09-23
2277,2022-09-23 21:00:00,14.2,0.0,0.0,0.0,76,9.8,62,2022-09-23
2278,2022-09-23 22:00:00,13.8,0.0,0.0,0.0,73,10.8,69,2022-09-23


In [19]:
weather_data = weather_data.merge(sun_df)

In [20]:
weather_data

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise,sunset,sunrise_datetime,sunset_datetime
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00
1,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00
2,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00
3,2022-06-21 03:00:00,9.9,0.0,0.0,0.0,1,8.1,283,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00
4,2022-06-21 04:00:00,9.4,0.0,0.0,0.0,8,3.2,360,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2275,2022-09-23 19:00:00,15.9,0.0,0.0,0.0,60,10.5,59,2022-09-23,2022-09-23T06:48,2022-09-23T18:57,2022-09-23 06:48:00,2022-09-23 18:57:00
2276,2022-09-23 20:00:00,14.3,0.6,0.6,0.0,81,9.0,61,2022-09-23,2022-09-23T06:48,2022-09-23T18:57,2022-09-23 06:48:00,2022-09-23 18:57:00
2277,2022-09-23 21:00:00,14.2,0.0,0.0,0.0,76,9.8,62,2022-09-23,2022-09-23T06:48,2022-09-23T18:57,2022-09-23 06:48:00,2022-09-23 18:57:00
2278,2022-09-23 22:00:00,13.8,0.0,0.0,0.0,73,10.8,69,2022-09-23,2022-09-23T06:48,2022-09-23T18:57,2022-09-23 06:48:00,2022-09-23 18:57:00


In [21]:
weather_data["encoding"] = weather_data.apply(lambda x: daytime_encoding(x["timestamp"], x["sunrise_datetime"], x["sunset_datetime"]), axis = 1)

In [22]:
weather_data.head(20)

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise,sunset,sunrise_datetime,sunset_datetime,encoding
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
1,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
2,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
3,2022-06-21 03:00:00,9.9,0.0,0.0,0.0,1,8.1,283,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
4,2022-06-21 04:00:00,9.4,0.0,0.0,0.0,8,3.2,360,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
5,2022-06-21 05:00:00,9.3,0.0,0.0,0.0,17,5.4,312,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,daytime
6,2022-06-21 06:00:00,10.8,0.0,0.0,0.0,26,8.0,275,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,daytime
7,2022-06-21 07:00:00,12.0,0.0,0.0,0.0,25,1.8,270,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,daytime
8,2022-06-21 08:00:00,14.1,0.0,0.0,0.0,19,3.7,61,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,daytime
9,2022-06-21 09:00:00,15.9,0.0,0.0,0.0,21,5.8,60,2022-06-21,2022-06-21T04:43,2022-06-21T21:21,2022-06-21 04:43:00,2022-06-21 21:21:00,daytime


In [23]:
weather_data_final = weather_data.drop(columns={"sunrise", "sunset"}, axis = 1)

In [24]:
weather_data_final

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,sunset_datetime,encoding
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
1,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
2,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333,2022-06-21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
3,2022-06-21 03:00:00,9.9,0.0,0.0,0.0,1,8.1,283,2022-06-21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
4,2022-06-21 04:00:00,9.4,0.0,0.0,0.0,8,3.2,360,2022-06-21,2022-06-21 04:43:00,2022-06-21 21:21:00,nighttime
...,...,...,...,...,...,...,...,...,...,...,...,...
2275,2022-09-23 19:00:00,15.9,0.0,0.0,0.0,60,10.5,59,2022-09-23,2022-09-23 06:48:00,2022-09-23 18:57:00,nighttime
2276,2022-09-23 20:00:00,14.3,0.6,0.6,0.0,81,9.0,61,2022-09-23,2022-09-23 06:48:00,2022-09-23 18:57:00,nighttime
2277,2022-09-23 21:00:00,14.2,0.0,0.0,0.0,76,9.8,62,2022-09-23,2022-09-23 06:48:00,2022-09-23 18:57:00,nighttime
2278,2022-09-23 22:00:00,13.8,0.0,0.0,0.0,73,10.8,69,2022-09-23,2022-09-23 06:48:00,2022-09-23 18:57:00,nighttime


In [25]:
weather_data_final.to_csv('../../../raw_data/weather_data_final.csv')  

## 3. Add events to main dataframe

### 3.1 Load the events_df

In [26]:
pwd

'/home/catherine/code/elsebasmar/london-bss/londonbss/notebooks/data_collection'

In [27]:
all_events_df = pd.read_csv("../../../raw_data/all_events_df.csv")

In [28]:
all_events_df

Unnamed: 0,title,start_date,end_date,Location,Latitude,Longitude,London_zone_Central,London_zone_London_all,London_zone_North,London_zone_South_West,London_zone_West,London_zone_East,London_zone_South_East,date
0,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-01-01
1,Good Friday,2014-04-18,2014-04-18,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-04-18
2,Easter Monday,2014-04-21,2014-04-21,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-04-21
3,Early May bank holiday,2014-05-05,2014-05-05,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-05-05
4,Spring bank holiday,2014-05-26,2014-05-26,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-05-26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
943,FA cup final,2018-05-19,2018-05-19,Wembley Stadium,51.55616476,-0.279596246,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2018-05-19
944,FA cup final,2019-05-18,2019-05-18,Wembley Stadium,51.55616476,-0.279596246,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2019-05-18
945,FA cup final,2021-05-15,2021-05-15,Wembley Stadium,51.55616476,-0.279596246,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2021-05-15
946,FA cup final,2021-05-14,2021-05-14,Wembley Stadium,51.55616476,-0.279596246,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2021-05-14


In [29]:
all_events_df.dtypes

title                      object
start_date                 object
end_date                   object
Location                   object
Latitude                   object
Longitude                  object
London_zone_Central       float64
London_zone_London_all    float64
London_zone_North         float64
London_zone_South_West    float64
London_zone_West          float64
London_zone_East          float64
London_zone_South_East    float64
date                       object
dtype: object

In [30]:
#all_events_df.drop("Unnamed: 0",axis =1, inplace=True)

In [31]:
all_events_df.columns

Index(['title', 'start_date', 'end_date', 'Location', 'Latitude', 'Longitude',
       'London_zone_Central', 'London_zone_London_all', 'London_zone_North',
       'London_zone_South_West', 'London_zone_West', 'London_zone_East',
       'London_zone_South_East', 'date'],
      dtype='object')

In [32]:
new_column_list = ('event_title', 'event_start_date', 'event_end_date', 'event_location', 'event_latitude', 'event_longitude', 'London_zone_Central', 'London_zone_South_East','London_zone_East', 'London_zone_London_all', 'London_zone_North',
       'London_zone_South_West', 'London_zone_West', 'date')

In [33]:
all_events_df.columns = new_column_list

In [34]:
all_events_df["event_start_date"]= pd.to_datetime(all_events_df["event_start_date"])

In [35]:
all_events_df["event_end_date"]= pd.to_datetime(all_events_df["event_end_date"])

In [36]:
all_events_df['date'] = all_events_df['event_start_date'].dt.date

In [37]:
all_events_df.head()

Unnamed: 0,event_title,event_start_date,event_end_date,event_location,event_latitude,event_longitude,London_zone_Central,London_zone_South_East,London_zone_East,London_zone_London_all,London_zone_North,London_zone_South_West,London_zone_West,date
0,New Year’s Day,2014-01-01,2014-01-01,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-01-01
1,Good Friday,2014-04-18,2014-04-18,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-04-18
2,Easter Monday,2014-04-21,2014-04-21,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-04-21
3,Early May bank holiday,2014-05-05,2014-05-05,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-05-05
4,Spring bank holiday,2014-05-26,2014-05-26,London-wide,London-wide,London-wide,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2014-05-26


### 3.2 Merge with main df

In [38]:
weather_events_data = weather_data_final.copy()

In [39]:
weather_events_data.shape

(2280, 12)

In [40]:
weather_events_data = weather_events_data.merge(all_events_df, on="date", how="left")

In [41]:
weather_events_data.shape

(2328, 25)

In [42]:
weather_events_data.to_csv('../../../raw_data/weather_events_data.csv')

## 4. Add Elisabeth line column

In [43]:
we_ev_el = weather_events_data.copy()

In [44]:
#Elisabeth line first day = 24/05/2022

In [45]:
we_ev_el["date"] = pd.to_datetime(we_ev_el["date"])

In [46]:
we_ev_el["elisabeth_line"] = "True"

In [47]:
we_ev_el.loc[we_ev_el["date"] < "2022-05-24", "elisabeth_line"] = False

In [48]:
we_ev_el.shape

(2328, 26)

In [49]:
we_ev_el.to_csv('../../../raw_data/we_ev_el.csv')

## 5. Add lockdown column

In [50]:
# lockdown 1 = 24/03/2020 to 28/05/2020
# lockdown 2 = 05/11/2020 to 02/12/2020
# lockdown 3 = 04/01/2021 to 12/04/2021

In [51]:
weevel_lo = we_ev_el.copy()

In [52]:
from datetime import datetime

In [53]:
lockdown1_start = datetime.strptime("2020-03-24", '%Y-%m-%d')
lockdown1_end = datetime.strptime("2020-05-28", '%Y-%m-%d')
lockdown2_start = datetime.strptime("2020-11-05", '%Y-%m-%d')
lockdown2_end = datetime.strptime("2020-12-02", '%Y-%m-%d')
lockdown3_start = datetime.strptime("2021-01-04", '%Y-%m-%d')
lockdown3_end = datetime.strptime("2021-04-12", '%Y-%m-%d')

In [54]:
def lockdown_date(date):
    if date > lockdown1_start and date <= lockdown1_end:
        return True
    elif date > lockdown2_start and date <= lockdown2_end:
        return True
    elif date > lockdown3_start and date <= lockdown3_end:
        return True
    else:
        return False

In [55]:
# test_date = datetime.strptime("2020-12-13", '%Y-%m-%d')
# print(lockdown_date(test_date, True))
#weather_data["encoding"] = weather_data.apply(lambda x: daytime_encoding(x["timestamp"], x["sunrise_datetime"], x["sunset_datetime"]), axis = 1)

In [56]:
weevel_lo["lockdown"] = weevel_lo["date"].apply(lockdown_date)
weevel_lo.lockdown.value_counts()

lockdown
False    2328
Name: count, dtype: int64

In [57]:
weevel_lo.to_csv('../../../raw_data/weevel_lo.csv')

## 6. School holidays

In [58]:
scho_hol = weevel_lo.copy()

In [59]:
school_holidays = pd.read_csv("../../../raw_data/school_holidays.csv")

In [60]:
school_holidays.columns

Index(['school_holidays', 'date'], dtype='object')

In [61]:
school_holidays["date"] = pd.to_datetime(school_holidays["date"])

In [62]:
scho_hol = scho_hol.merge(school_holidays, on="date", how="left")

In [63]:
scho_hol.head(100)

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,...,London_zone_Central,London_zone_South_East,London_zone_East,London_zone_London_all,London_zone_North,London_zone_South_West,London_zone_West,elisabeth_line,lockdown,school_holidays
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21,2022-06-21 04:43:00,...,,,,,,,,True,False,
1,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21,2022-06-21 04:43:00,...,,,,,,,,True,False,
2,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333,2022-06-21,2022-06-21 04:43:00,...,,,,,,,,True,False,
3,2022-06-21 03:00:00,9.9,0.0,0.0,0.0,1,8.1,283,2022-06-21,2022-06-21 04:43:00,...,,,,,,,,True,False,
4,2022-06-21 04:00:00,9.4,0.0,0.0,0.0,8,3.2,360,2022-06-21,2022-06-21 04:43:00,...,,,,,,,,True,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2022-06-24 23:00:00,15.5,0.4,0.4,0.0,100,19.5,275,2022-06-24,2022-06-24 04:44:00,...,,,,,,,,True,False,
96,2022-06-25 00:00:00,15.3,0.2,0.2,0.0,100,18.8,241,2022-06-25,2022-06-25 04:44:00,...,,,,,,,,True,False,
97,2022-06-25 01:00:00,14.7,0.0,0.0,0.0,45,16.1,228,2022-06-25,2022-06-25 04:44:00,...,,,,,,,,True,False,
98,2022-06-25 02:00:00,13.8,0.0,0.0,0.0,34,15.9,232,2022-06-25,2022-06-25 04:44:00,...,,,,,,,,True,False,


In [64]:
scho_hol.to_csv('../../../raw_data/scho_hol.csv')

## 7. Strikes

In [65]:
strikes_df = scho_hol.copy()

In [66]:
strikes = pd.read_csv("../../../raw_data/strikes.csv")

In [67]:
strikes.columns

Index(['strike', 'date'], dtype='object')

In [68]:
strikes["date"] = pd.to_datetime(strikes["date"])

In [69]:
strikes_df = strikes_df.merge(strikes, on="date", how="left")

In [70]:
strikes_df.head()

Unnamed: 0,timestamp,temperature,precipitation,rainfall,snowfall,cloudcover,wind_speed,wind_direction,date,sunrise_datetime,...,London_zone_South_East,London_zone_East,London_zone_London_all,London_zone_North,London_zone_South_West,London_zone_West,elisabeth_line,lockdown,school_holidays,strike
0,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21,2022-06-21 04:43:00,...,,,,,,,True,False,,tube
1,2022-06-21 00:00:00,12.5,0.0,0.0,0.0,11,8.8,348,2022-06-21,2022-06-21 04:43:00,...,,,,,,,True,False,,train
2,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21,2022-06-21 04:43:00,...,,,,,,,True,False,,tube
3,2022-06-21 01:00:00,11.5,0.0,0.0,0.0,0,7.6,355,2022-06-21,2022-06-21 04:43:00,...,,,,,,,True,False,,train
4,2022-06-21 02:00:00,10.6,0.0,0.0,0.0,1,8.0,333,2022-06-21,2022-06-21 04:43:00,...,,,,,,,True,False,,tube


In [71]:
strikes_df.to_csv('../../../raw_data/strikes_df.csv')

## Final df export

In [72]:
final_features_df = strikes_df.copy()

In [73]:
final_features_df.shape

(2376, 29)

In [74]:
final_features_df.to_csv('../../../raw_data/final_features_df.csv')