In [1]:
import datetime as dt
import numpy as np
import pandas as pd
import pickle
from utils import csv_to_vec

In [128]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [129]:
OUTPUT_DATA_FOLDER = 'data/4_final_dataset'
OUTPUT_WEATHER_EVENTS_DATASET = 'hourly_weather_events_data.csv'
OUTPUT_MERGED_DATASET = 'merged_dataset.csv'

## ISW Data Preparation

- adding a column with repective TF-IDF vectors
- adding a column with the date following the report_date

In [130]:
ISW_DATA_PATH = 'data/2_preprocessed_isw_data/preprocessed_isw_report'
VECTORS_FOLDER = 'data/3_isw_vectorized_data'
tfidf_transformer = 'tfidf_transformer'
count_vectorizer = 'cv'

OUTPUT_ISW_DATA = 'isw_final.csv'

In [131]:
df_isw = pd.read_csv(f'{ISW_DATA_PATH}.csv', sep=';')
df_isw.head(5)

Unnamed: 0,date,title,text_title,main_text,text_stemm,text_lemm
0,2022-02-24,Russia-Ukraine Warning Update: Initial Russian...,Russia-Ukraine Warning Update: Initial Russian...,\n\nRussian President Vladimir Putin began a...,russian presid vladimir putin began larg scal...,russian president vladimir putin began large ...
1,2022-02-25,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Kateryna Step...",mason clark georg barro kateryna stepanenko r...,mason clark george barros kateryna stepanenko...
2,2022-02-26,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Katya Stepane...",mason clark georg barro katya stepanenko russ...,mason clark george barros katya stepanenko ru...
3,2022-02-27,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"\n\nFebruary 27, 4pm EST\n\nThe Russian milit...",februari twenti seven 4pm est russian militar...,february twenty seven 4pm est russian militar...
4,2022-02-28,"Russian Offensive Campaign Assessment, Februar...","Russian Offensive Campaign Assessment, Februar...",\n\nThe Russian military is reorganizing its...,russian militari reorgan militari effort atte...,russian military reorganizing military effort...


In [132]:
tfidf = pickle.load(open(f'{VECTORS_FOLDER}/{tfidf_transformer}.pkl', 'rb'))
cv = pickle.load(open(f'{VECTORS_FOLDER}/{count_vectorizer}.pkl', 'rb'))
df_isw['keywords'] = df_isw['text_lemm'].apply(lambda x: csv_to_vec.csv_to_vec(x, cv, tfidf))

In [133]:
df_isw.head(5)

Unnamed: 0,date,title,text_title,main_text,text_stemm,text_lemm,keywords
0,2022-02-24,Russia-Ukraine Warning Update: Initial Russian...,Russia-Ukraine Warning Update: Initial Russian...,\n\nRussian President Vladimir Putin began a...,russian presid vladimir putin began larg scal...,russian president vladimir putin began large ...,"{'pm': 0.294, 'airport': 0.27, 'kyiv': 0.247, ..."
1,2022-02-25,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Kateryna Step...",mason clark georg barro kateryna stepanenko r...,mason clark george barros kateryna stepanenko...,"{'february': 0.341, 'kyiv': 0.326, 'pm': 0.263..."
2,2022-02-26,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Katya Stepane...",mason clark georg barro katya stepanenko russ...,mason clark george barros katya stepanenko ru...,"{'february': 0.41, 'kyiv': 0.37, 'twenty': 0.2..."
3,2022-02-27,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"\n\nFebruary 27, 4pm EST\n\nThe Russian milit...",februari twenti seven 4pm est russian militar...,february twenty seven 4pm est russian militar...,"{'february': 0.51, 'seven': 0.322, 'twenty': 0..."
4,2022-02-28,"Russian Offensive Campaign Assessment, Februar...","Russian Offensive Campaign Assessment, Februar...",\n\nThe Russian military is reorganizing its...,russian militari reorgan militari effort atte...,russian military reorganizing military effort...,"{'february': 0.542, 'twenty': 0.317, 'eight': ..."


In [134]:
df_isw['date'] = pd.to_datetime(df_isw['date'])
df_isw = df_isw.rename(columns={'date':'report_date'})
df_isw['following_date'] = df_isw['report_date'].apply(lambda x: x + dt.timedelta(days=1))

In [135]:
df_isw.head(5)

Unnamed: 0,report_date,title,text_title,main_text,text_stemm,text_lemm,keywords,following_date
0,2022-02-24,Russia-Ukraine Warning Update: Initial Russian...,Russia-Ukraine Warning Update: Initial Russian...,\n\nRussian President Vladimir Putin began a...,russian presid vladimir putin began larg scal...,russian president vladimir putin began large ...,"{'pm': 0.294, 'airport': 0.27, 'kyiv': 0.247, ...",2022-02-25
1,2022-02-25,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Kateryna Step...",mason clark georg barro kateryna stepanenko r...,mason clark george barros kateryna stepanenko...,"{'february': 0.341, 'kyiv': 0.326, 'pm': 0.263...",2022-02-26
2,2022-02-26,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Katya Stepane...",mason clark georg barro katya stepanenko russ...,mason clark george barros katya stepanenko ru...,"{'february': 0.41, 'kyiv': 0.37, 'twenty': 0.2...",2022-02-27
3,2022-02-27,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"\n\nFebruary 27, 4pm EST\n\nThe Russian milit...",februari twenti seven 4pm est russian militar...,february twenty seven 4pm est russian militar...,"{'february': 0.51, 'seven': 0.322, 'twenty': 0...",2022-02-28
4,2022-02-28,"Russian Offensive Campaign Assessment, Februar...","Russian Offensive Campaign Assessment, Februar...",\n\nThe Russian military is reorganizing its...,russian militari reorgan militari effort atte...,russian military reorganizing military effort...,"{'february': 0.542, 'twenty': 0.317, 'eight': ...",2022-03-01


In [136]:
df_isw.to_csv(f'{OUTPUT_DATA_FOLDER}/{OUTPUT_ISW_DATA}', index=False)

In [137]:
df_isw.shape

(333, 8)

## Events data preparation
- removing unneccessary columns (id, region_id)
- adding date columns formatted as datetime and timestamp objects

In [77]:
EVENTS_DATA_PATH = 'data/0_alarms.csv'

In [79]:
df_events = pd.read_csv(EVENTS_DATA_PATH, delimiter=';')
df_events.shape

(19933, 9)

In [80]:
df_events = df_events.drop(columns=['region_id', 'id'])
df_events.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,


In [81]:
def isNaN(num):
    return num != num

In [82]:
df_events['start_time'] = pd.to_datetime(df_events['start'])
df_events['end_time'] = pd.to_datetime(df_events['end'])

In [83]:
df_events['start_hour'] = df_events['start_time'].dt.floor('H')
df_events['end_hour'] = df_events['end_time'].dt.ceil('H')

In [84]:

df_events["day_date"] = df_events["start_time"].dt.date
df_events["start_hour_datetimeEpoch"] = df_events['start_hour'].apply(lambda x: int(dt.datetime.timestamp(x)) if not isNaN(x) else None)
df_events["end_hour_datetimeEpoch"] = df_events['end_hour'].apply(lambda x: int(dt.datetime.timestamp(x)) if not isNaN(x) else None)

In [85]:
df_events.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645819200,1645826400
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645848000,1645855200
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645851600,1645855200
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645855200,1645862400
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,2022-02-26,1645855200,1645866000


In [92]:
df_events.loc[df_events['region_title'].str.contains('Донеччина'), 'region_title'] = 'Донеччина'
df_events[df_events['region_title'] == 'Донеччина']

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
19,Донеччина,Маріуполь,0,2022-02-27 14:49:52,2022-02-27 20:39:28,2022-02-27 20:39:28,,2022-02-27 14:49:52,2022-02-27 20:39:28,2022-02-27 14:00:00,2022-02-27 21:00:00,2022-02-27,1645963200,1645988400
81,Донеччина,Маріуполь,0,2022-02-28 21:01:54,2022-02-28 21:28:04,2022-02-28 21:28:04,,2022-02-28 21:01:54,2022-02-28 21:28:04,2022-02-28 21:00:00,2022-02-28 22:00:00,2022-02-28,1646074800,1646078400
107,Донеччина,Краматорськ,0,2022-03-01 09:12:50,2022-03-01 09:40:04,2022-03-01 09:40:04,,2022-03-01 09:12:50,2022-03-01 09:40:04,2022-03-01 09:00:00,2022-03-01 10:00:00,2022-03-01,1646118000,1646121600
117,Донеччина,Краматорськ,0,2022-03-01 10:57:02,2022-03-01 11:05:40,2022-03-01 11:05:40,,2022-03-01 10:57:02,2022-03-01 11:05:40,2022-03-01 10:00:00,2022-03-01 12:00:00,2022-03-01,1646121600,1646128800
131,Донеччина,Краматорськ,0,2022-03-01 15:36:27,2022-03-01 16:15:23,2022-03-01 16:15:23,,2022-03-01 15:36:27,2022-03-01 16:15:23,2022-03-01 15:00:00,2022-03-01 17:00:00,2022-03-01,1646139600,1646146800
230,Донеччина,Краматорськ,0,2022-03-03 00:10:37,2022-03-03 00:19:41,2022-03-03 00:19:41,,2022-03-03 00:10:37,2022-03-03 00:19:41,2022-03-03 00:00:00,2022-03-03 01:00:00,2022-03-03,1646258400,1646262000
231,Донеччина,Краматорськ,0,2022-03-03 00:39:23,2022-03-03 00:44:42,2022-03-03 00:44:42,,2022-03-03 00:39:23,2022-03-03 00:44:42,2022-03-03 00:00:00,2022-03-03 01:00:00,2022-03-03,1646258400,1646262000
341,Донеччина,Краматорськ,0,2022-03-04 11:49:26,2022-03-04 12:35:22,2022-03-04 12:35:22,,2022-03-04 11:49:26,2022-03-04 12:35:22,2022-03-04 11:00:00,2022-03-04 13:00:00,2022-03-04,1646384400,1646391600
442,Донеччина,Краматорськ,0,2022-03-05 18:13:05,2022-03-05 19:21:21,2022-03-05 19:21:21,,2022-03-05 18:13:05,2022-03-05 19:21:21,2022-03-05 18:00:00,2022-03-05 20:00:00,2022-03-05,1646496000,1646503200
443,Донеччина,Слов'янськ,0,2022-03-05 18:16:12,2022-03-05 19:17:48,2022-03-05 19:17:48,,2022-03-05 18:16:12,2022-03-05 19:17:48,2022-03-05 18:00:00,2022-03-05 20:00:00,2022-03-05,1646496000,1646503200


## Weather data preparation
- removing unneccessary features
- adding a __city__ column

In [93]:
WEATHER_DATA_PATH = 'data/0_all_weather_by_hour.csv'

In [94]:
df_weather = pd.read_csv(WEATHER_DATA_PATH)
df_weather.head(5)

Unnamed: 0,city_latitude,city_longitude,city_resolvedAddress,city_address,city_timezone,city_tzoffset,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_feelslikemax,day_feelslikemin,day_feelslike,day_dew,day_humidity,day_precip,day_precipprob,day_precipcover,day_snow,day_snowdepth,day_windgust,day_windspeed,day_winddir,day_pressure,day_cloudcover,day_visibility,day_solarradiation,day_solarenergy,day_uvindex,day_severerisk,day_sunrise,day_sunriseEpoch,day_sunset,day_sunsetEpoch,day_moonphase,day_conditions,day_description,day_icon,day_source,day_preciptype,day_stations,hour_datetime,hour_datetimeEpoch,hour_temp,hour_feelslike,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,hour_icon,hour_source,hour_stations
0,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,00:00:00,1645653600,2.4,-1.6,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,snow,obs,remote
1,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,01:00:00,1645657200,2.4,-1.5,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,fog,obs,remote
2,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,02:00:00,1645660800,2.9,-0.8,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,cloudy,obs,33177099999
3,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,03:00:00,1645664400,2.3,-1.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,fog,obs,remote
4,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,04:00:00,1645668000,1.9,-1.8,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,cloudy,obs,remote


In [95]:
df_weather.shape

(182712, 67)

In [96]:
df_weather['day_datetime'] = pd.to_datetime(df_weather['day_datetime'])

In [97]:
weather_exclude = [
"day_feelslikemax",
"day_feelslikemin",
"day_sunriseEpoch",
"day_sunsetEpoch",
"day_description",
"city_latitude",
"city_longitude",
"city_address",
"city_timezone",
"city_tzoffset",
"day_feelslike",
"day_precipprob",
"day_snow",
"day_snowdepth",
"day_windgust",
"day_windspeed",
"day_winddir",
"day_pressure",
"day_cloudcover",
"day_visibility",
"day_severerisk",
"day_conditions",
"day_icon",
"day_source",
"day_preciptype",
"day_stations",
"hour_icon",
"hour_source",
"hour_stations",
"hour_feelslike"
]

In [98]:
df_weather = df_weather.drop(columns=weather_exclude)

In [99]:
df_weather.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast


In [100]:
df_weather["city"] = df_weather["city_resolvedAddress"].apply(lambda x: x.split(",")[0])
df_weather["city"] = df_weather["city"].replace('Хмельницька область', "Хмельницький")

In [101]:
df_weather.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк
5,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,05:00:00,1645671600,1.9,91.66,0.6,0.0,0.0,0.0,0.1,,23.4,10.8,296.0,1022.5,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк
6,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,06:00:00,1645675200,2.0,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],20.9,10.8,300.0,1021.0,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк
7,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,07:00:00,1645678800,2.0,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],19.1,10.8,300.0,1022.0,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк
8,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,08:00:00,1645682400,1.8,91.32,0.6,0.118,100.0,0.0,0.1,['snow'],16.9,7.2,303.0,1024.2,4.4,100.0,,,,10.0,"Snow, Overcast",Луцьк
9,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,09:00:00,1645686000,2.0,93.09,1.0,0.0,0.0,0.0,0.1,,15.5,10.8,300.0,1024.0,2.0,100.0,15.0,0.1,0.0,10.0,Overcast,Луцьк


In [102]:
df_weather.shape

(182712, 38)

# Merging the Datasets


### Merging Weather Data + Region Data

In [103]:
REGION_DATA_PATH = 'data/0_regions.csv'

In [104]:
df_regions = pd.read_csv(REGION_DATA_PATH)
df_regions

Unnamed: 0,region,center_city_ua,center_city_en,region_alt,region_id
0,АР Крим,Сімферополь,Simferopol,Крим,1
1,Вінницька,Вінниця,Vinnytsia,Вінниччина,2
2,Волинська,Луцьк,Lutsk,Волинь,3
3,Дніпропетровська,Дніпро,Dnipro,Дніпропетровщина,4
4,Донецька,Донецьк,Donetsk,Донеччина,5
5,Житомирська,Житомир,Zhytomyr,Житомирщина,6
6,Закарпатська,Ужгород,Uzhgorod,Закарпаття,7
7,Запорізька,Запоріжжя,Zaporozhye,Запоріжжя,8
8,Івано-Франківська,Івано-Франківськ,Ivano-Frankivsk,Івано-Франківщина,9
9,Київська,Київ,Kyiv,Київщина,10


In [105]:
df_weather_reg = pd.merge(df_weather, df_regions, left_on='city', right_on='center_city_ua')

In [106]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3


In [107]:
df_weather_reg.shape

(182712, 43)

### Merging Weather by Region Data + Events Data

In [108]:
df_events.shape

(19933, 14)

In [109]:
events_dict = df_events.to_dict('records')
events_by_hour = []

In [110]:
events_dict[0]

{'region_title': 'Вінниччина',
 'region_city': 'Вінниця',
 'all_region': 0,
 'start': '2022-02-25 22:55:42',
 'end': '2022-02-25 23:41:53',
 'clean_end': '2022-02-25 23:41:53',
 'intersection_alarm_id': nan,
 'start_time': Timestamp('2022-02-25 22:55:42'),
 'end_time': Timestamp('2022-02-25 23:41:53'),
 'start_hour': Timestamp('2022-02-25 22:00:00'),
 'end_hour': Timestamp('2022-02-26 00:00:00'),
 'day_date': datetime.date(2022, 2, 25),
 'start_hour_datetimeEpoch': 1645819200,
 'end_hour_datetimeEpoch': 1645826400}

In [111]:
for event in events_dict:
    for d in pd.date_range(start=event['start_hour'], end=event['end_hour'], freq='1H'):
        e = event.copy()
        e['hour_level_event_time'] = d
        events_by_hour.append(e)

In [112]:
df_events = pd.DataFrame.from_dict(events_by_hour)

In [113]:
df_events['hour_level_event_datetimeEpoch'] = df_events['hour_level_event_time'].apply(lambda x: int(dt.datetime.timestamp(x)) if not isNaN(x) else None)

In [114]:
df_weather_reg.shape, df_events.shape

((182712, 43), (58860, 16))

In [115]:
df_events.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645819200,1645826400,2022-02-25 22:00:00,1645819200
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645819200,1645826400,2022-02-25 23:00:00,1645822800
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645819200,1645826400,2022-02-26 00:00:00,1645826400
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645848000,1645855200,2022-02-26 06:00:00,1645848000
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645848000,1645855200,2022-02-26 07:00:00,1645851600


In [116]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3


In [117]:
df_events = df_events.add_prefix("event_")

In [118]:
df_events.head(1)

Unnamed: 0,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_start_time,event_end_time,event_start_hour,event_end_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26,2022-02-25,1645819200,1645826400,2022-02-25 22:00:00,1645819200


In [119]:
df_weather_events = df_weather_reg.merge(df_events, 
                                         how='left', 
                                         left_on=['region_alt', 'hour_datetimeEpoch'], 
                                         right_on=['event_region_title', 'event_hour_level_event_datetimeEpoch'])

In [120]:
df_weather_events[10000:10060]

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_start_time,event_end_time,event_start_hour,event_end_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch
10000,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,09:00:00,1652162400,11.9,48.58,1.4,0.0,0.0,0.0,0.0,,24.5,10.8,340.0,1023.6,10.0,0.0,483.0,1.7,5.0,10.0,Clear,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10001,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,10:00:00,1652166000,12.8,35.23,-2.2,0.0,0.0,0.0,0.0,,29.5,16.2,342.6,1023.0,24.1,0.0,646.0,2.3,6.0,10.0,Clear,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10002,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,11:00:00,1652169600,13.7,31.54,-2.9,0.0,0.0,0.0,0.0,,34.6,19.1,348.9,1023.0,24.1,22.9,777.0,2.8,8.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10003,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,12:00:00,1652173200,16.7,37.7,2.2,0.0,0.0,0.0,0.0,,37.1,14.4,10.0,1022.5,10.0,51.3,866.0,3.1,9.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10004,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,13:00:00,1652176800,14.7,29.13,-3.1,0.0,0.0,0.0,0.0,,37.8,20.5,338.6,1022.0,24.1,90.6,907.0,3.3,9.0,10.0,Overcast,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10005,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,14:00:00,1652180400,14.8,27.88,-3.6,0.0,0.0,0.0,0.0,,39.6,19.8,337.2,1022.0,24.1,69.2,894.0,3.2,9.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,Кіровоградщина,Кіровоградська обл.,1.0,2022-05-10 14:34:15,2022-05-10 14:52:18,2022-05-10 14:52:18,,2022-05-10 14:34:15,2022-05-10 14:52:18,2022-05-10 14:00:00,2022-05-10 15:00:00,2022-05-10,1652180000.0,1652184000.0,2022-05-10 14:00:00,1652180000.0
10006,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,15:00:00,1652184000,17.7,28.3,-0.9,0.0,0.0,0.0,0.0,,37.8,18.0,340.0,1020.6,10.0,40.0,820.0,3.0,8.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,Кіровоградщина,Кіровоградська обл.,1.0,2022-05-10 14:34:15,2022-05-10 14:52:18,2022-05-10 14:52:18,,2022-05-10 14:34:15,2022-05-10 14:52:18,2022-05-10 14:00:00,2022-05-10 15:00:00,2022-05-10,1652180000.0,1652184000.0,2022-05-10 15:00:00,1652184000.0
10007,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,16:00:00,1652187600,14.5,28.43,-3.6,0.0,0.0,0.0,0.0,,37.1,20.9,339.7,1021.0,24.1,81.3,590.0,2.1,6.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10008,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,17:00:00,1652191200,14.1,28.96,-3.7,0.0,0.0,0.0,0.0,,37.1,20.9,342.0,1021.0,24.1,68.6,526.0,1.9,5.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
10009,"Кропивницький, Україна",2022-05-10,1652130000,17.7,4.7,10.9,-1.0,46.2,0.0,0.0,316.3,27.3,9.0,05:17:43,20:17:57,0.3,18:00:00,1652194800,16.9,30.42,-0.6,0.0,0.0,0.0,0.0,,36.7,21.6,350.0,1018.7,10.0,60.0,382.0,1.4,4.0,10.0,Partially cloudy,Кропивницький,Кіровоградська,Кропивницький,Kropyvnytskyi,Кіровоградщина,11,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,


In [121]:
df_weather_events.shape

(200903, 59)

In [124]:
df_weather_events.duplicated().sum()

0

In [126]:
df_weather_events.to_csv(f'{OUTPUT_DATA_FOLDER}/{OUTPUT_WEATHER_EVENTS_DATASET}', index=False, sep=';')

### Merging Hourly Weather & Events Data + ISW Reports 

In [138]:
df_isw.head(5)

Unnamed: 0,report_date,title,text_title,main_text,text_stemm,text_lemm,keywords,following_date
0,2022-02-24,Russia-Ukraine Warning Update: Initial Russian...,Russia-Ukraine Warning Update: Initial Russian...,\n\nRussian President Vladimir Putin began a...,russian presid vladimir putin began larg scal...,russian president vladimir putin began large ...,"{'pm': 0.294, 'airport': 0.27, 'kyiv': 0.247, ...",2022-02-25
1,2022-02-25,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Kateryna Step...",mason clark georg barro kateryna stepanenko r...,mason clark george barros kateryna stepanenko...,"{'february': 0.341, 'kyiv': 0.326, 'pm': 0.263...",2022-02-26
2,2022-02-26,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"Mason Clark, George Barros, and Katya Stepane...",mason clark georg barro katya stepanenko russ...,mason clark george barros katya stepanenko ru...,"{'february': 0.41, 'kyiv': 0.37, 'twenty': 0.2...",2022-02-27
3,2022-02-27,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,"\n\nFebruary 27, 4pm EST\n\nThe Russian milit...",februari twenti seven 4pm est russian militar...,february twenty seven 4pm est russian militar...,"{'february': 0.51, 'seven': 0.322, 'twenty': 0...",2022-02-28
4,2022-02-28,"Russian Offensive Campaign Assessment, Februar...","Russian Offensive Campaign Assessment, Februar...",\n\nThe Russian military is reorganizing its...,russian militari reorgan militari effort atte...,russian military reorganizing military effort...,"{'february': 0.542, 'twenty': 0.317, 'eight': ...",2022-03-01


In [139]:
df_final_dataset = pd.merge(left=df_weather_events, 
                    right=df_isw, 
                    left_on='day_datetime', 
                    right_on='following_date')

In [140]:
df_final_dataset[20000:20060]

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_start_time,event_end_time,event_start_hour,event_end_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch,report_date,title,text_title,main_text,text_stemm,text_lemm,keywords,following_date
20000,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,20:00:00,1648144800,6.6,47.06,-3.9,0.0,0.0,0.0,0.0,,18.7,8.3,218.8,1017.0,24.1,68.6,0.0,,0.0,10.0,Partially cloudy,Миколаїв,Миколаївська,Миколаїв,Mykolaiv,Миколаївщина,14,Миколаївщина,Миколаївська обл.,1.0,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 20:08:36,,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 19:00:00,2022-03-24 21:00:00,2022-03-24,1648141000.0,1648148000.0,2022-03-24 20:00:00,1648145000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20001,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,21:00:00,1648148400,5.9,53.61,-2.8,0.0,0.0,0.0,0.0,,16.9,7.6,224.6,1017.0,24.1,85.2,0.0,,0.0,10.0,Partially cloudy,Миколаїв,Миколаївська,Миколаїв,Mykolaiv,Миколаївщина,14,Миколаївщина,Миколаївська обл.,1.0,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 20:08:36,,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 19:00:00,2022-03-24 21:00:00,2022-03-24,1648141000.0,1648148000.0,2022-03-24 21:00:00,1648148000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20002,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,22:00:00,1648152000,6.1,57.35,-1.7,0.0,0.0,0.0,0.0,,14.4,4.7,307.5,1017.0,24.1,91.0,0.0,,0.0,10.0,Overcast,Миколаїв,Миколаївська,Миколаїв,Mykolaiv,Миколаївщина,14,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20003,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,23:00:00,1648155600,6.6,55.41,-1.7,0.0,0.0,0.0,0.0,,21.2,10.8,346.1,1017.0,24.1,84.2,0.0,,0.0,10.0,Partially cloudy,Миколаїв,Миколаївська,Миколаїв,Mykolaiv,Миколаївщина,14,Миколаївщина,Вознесенськ,0.0,2022-03-24 23:09:18,2022-03-25 00:08:18,2022-03-25 00:08:18,,2022-03-24 23:09:18,2022-03-25 00:08:18,2022-03-24 23:00:00,2022-03-25 01:00:00,2022-03-24,1648156000.0,1648163000.0,2022-03-24 23:00:00,1648156000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20004,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,00:00:00,1648072800,3.0,80.65,0.0,0.0,0.0,0.0,1.5,,5.0,7.2,170.0,1022.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Харків,Kharkiv,Харківщина,20,Харківщина,Ізюм,0.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-24 09:56:04,4573.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-23 20:00:00,2022-03-24 10:00:00,2022-03-23,1648058000.0,1648109000.0,2022-03-24 00:00:00,1648073000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20005,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,00:00:00,1648072800,3.0,80.65,0.0,0.0,0.0,0.0,1.5,,5.0,7.2,170.0,1022.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Харків,Kharkiv,Харківщина,20,Харківщина,Харківська обл.,1.0,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-24 03:01:45,,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-23 21:00:00,2022-03-24 04:00:00,2022-03-23,1648062000.0,1648087000.0,2022-03-24 00:00:00,1648073000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20006,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,01:00:00,1648076400,2.0,80.51,-1.0,0.0,0.0,0.0,1.5,,5.0,7.2,180.0,1021.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Харків,Kharkiv,Харківщина,20,Харківщина,Ізюм,0.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-24 09:56:04,4573.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-23 20:00:00,2022-03-24 10:00:00,2022-03-23,1648058000.0,1648109000.0,2022-03-24 01:00:00,1648076000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20007,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,01:00:00,1648076400,2.0,80.51,-1.0,0.0,0.0,0.0,1.5,,5.0,7.2,180.0,1021.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Харків,Kharkiv,Харківщина,20,Харківщина,Харківська обл.,1.0,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-24 03:01:45,,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-23 21:00:00,2022-03-24 04:00:00,2022-03-23,1648062000.0,1648087000.0,2022-03-24 01:00:00,1648076000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20008,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,02:00:00,1648080000,4.0,69.88,-1.0,0.0,0.0,0.0,1.5,,5.4,7.2,210.0,1020.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Харків,Kharkiv,Харківщина,20,Харківщина,Ізюм,0.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-24 09:56:04,4573.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-23 20:00:00,2022-03-24 10:00:00,2022-03-23,1648058000.0,1648109000.0,2022-03-24 02:00:00,1648080000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24
20009,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,02:00:00,1648080000,4.0,69.88,-1.0,0.0,0.0,0.0,1.5,,5.4,7.2,210.0,1020.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Харків,Kharkiv,Харківщина,20,Харківщина,Харківська обл.,1.0,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-24 03:01:45,,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-23 21:00:00,2022-03-24 04:00:00,2022-03-23,1648062000.0,1648087000.0,2022-03-24 02:00:00,1648080000.0,2022-03-23,"Russian Offensive Campaign Assessment, March 2...","Russian Offensive Campaign Assessment, March 23",\n\nRussian forces continued to settle in fo...,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20...",2022-03-24


In [141]:
df_final_dataset.shape

(198685, 67)

In [142]:
exclude = [
    'report_date',
    'following_date',
    'title',
    'text_title',
    'main_text',
    'center_city_ua',
]

In [143]:
df_final_dataset = df_final_dataset.drop(columns=exclude)

In [144]:
df_final_dataset[20000:20060]

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_start_time,event_end_time,event_start_hour,event_end_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch,text_stemm,text_lemm,keywords
20000,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,20:00:00,1648144800,6.6,47.06,-3.9,0.0,0.0,0.0,0.0,,18.7,8.3,218.8,1017.0,24.1,68.6,0.0,,0.0,10.0,Partially cloudy,Миколаїв,Миколаївська,Mykolaiv,Миколаївщина,14,Миколаївщина,Миколаївська обл.,1.0,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 20:08:36,,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 19:00:00,2022-03-24 21:00:00,2022-03-24,1648141000.0,1648148000.0,2022-03-24 20:00:00,1648145000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20001,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,21:00:00,1648148400,5.9,53.61,-2.8,0.0,0.0,0.0,0.0,,16.9,7.6,224.6,1017.0,24.1,85.2,0.0,,0.0,10.0,Partially cloudy,Миколаїв,Миколаївська,Mykolaiv,Миколаївщина,14,Миколаївщина,Миколаївська обл.,1.0,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 20:08:36,,2022-03-24 19:02:48,2022-03-24 20:08:36,2022-03-24 19:00:00,2022-03-24 21:00:00,2022-03-24,1648141000.0,1648148000.0,2022-03-24 21:00:00,1648148000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20002,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,22:00:00,1648152000,6.1,57.35,-1.7,0.0,0.0,0.0,0.0,,14.4,4.7,307.5,1017.0,24.1,91.0,0.0,,0.0,10.0,Overcast,Миколаїв,Миколаївська,Mykolaiv,Миколаївщина,14,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20003,"Миколаїв, Україна",2022-03-24,1648072800,12.8,0.9,6.5,-5.8,42.6,0.0,0.0,225.3,19.4,7.0,05:47:32,18:10:04,0.72,23:00:00,1648155600,6.6,55.41,-1.7,0.0,0.0,0.0,0.0,,21.2,10.8,346.1,1017.0,24.1,84.2,0.0,,0.0,10.0,Partially cloudy,Миколаїв,Миколаївська,Mykolaiv,Миколаївщина,14,Миколаївщина,Вознесенськ,0.0,2022-03-24 23:09:18,2022-03-25 00:08:18,2022-03-25 00:08:18,,2022-03-24 23:09:18,2022-03-25 00:08:18,2022-03-24 23:00:00,2022-03-25 01:00:00,2022-03-24,1648156000.0,1648163000.0,2022-03-24 23:00:00,1648156000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20004,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,00:00:00,1648072800,3.0,80.65,0.0,0.0,0.0,0.0,1.5,,5.0,7.2,170.0,1022.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Kharkiv,Харківщина,20,Харківщина,Ізюм,0.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-24 09:56:04,4573.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-23 20:00:00,2022-03-24 10:00:00,2022-03-23,1648058000.0,1648109000.0,2022-03-24 00:00:00,1648073000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20005,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,00:00:00,1648072800,3.0,80.65,0.0,0.0,0.0,0.0,1.5,,5.0,7.2,170.0,1022.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Kharkiv,Харківщина,20,Харківщина,Харківська обл.,1.0,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-24 03:01:45,,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-23 21:00:00,2022-03-24 04:00:00,2022-03-23,1648062000.0,1648087000.0,2022-03-24 00:00:00,1648073000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20006,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,01:00:00,1648076400,2.0,80.51,-1.0,0.0,0.0,0.0,1.5,,5.0,7.2,180.0,1021.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Kharkiv,Харківщина,20,Харківщина,Ізюм,0.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-24 09:56:04,4573.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-23 20:00:00,2022-03-24 10:00:00,2022-03-23,1648058000.0,1648109000.0,2022-03-24 01:00:00,1648076000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20007,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,01:00:00,1648076400,2.0,80.51,-1.0,0.0,0.0,0.0,1.5,,5.0,7.2,180.0,1021.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Kharkiv,Харківщина,20,Харківщина,Харківська обл.,1.0,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-24 03:01:45,,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-23 21:00:00,2022-03-24 04:00:00,2022-03-23,1648062000.0,1648087000.0,2022-03-24 01:00:00,1648076000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20008,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,02:00:00,1648080000,4.0,69.88,-1.0,0.0,0.0,0.0,1.5,,5.4,7.2,210.0,1020.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Kharkiv,Харківщина,20,Харківщина,Ізюм,0.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-24 09:56:04,4573.0,2022-03-23 20:57:03,2022-03-24 09:56:04,2022-03-23 20:00:00,2022-03-24 10:00:00,2022-03-23,1648058000.0,1648109000.0,2022-03-24 02:00:00,1648080000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."
20009,"Харків, Україна",2022-03-24,1648072800,12.0,2.0,6.8,-4.0,48.7,0.0,0.0,209.7,18.1,7.0,05:29:35,17:54:07,0.72,02:00:00,1648080000,4.0,69.88,-1.0,0.0,0.0,0.0,1.5,,5.4,7.2,210.0,1020.0,10.0,0.0,0.0,,0.0,10.0,Clear,Харків,Харківська,Kharkiv,Харківщина,20,Харківщина,Харківська обл.,1.0,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-24 03:01:45,,2022-03-23 21:24:45,2022-03-24 03:01:45,2022-03-23 21:00:00,2022-03-24 04:00:00,2022-03-23,1648062000.0,1648087000.0,2022-03-24 02:00:00,1648080000.0,russian forc continu settl protract stalem co...,russian force continued settle protracted sta...,"{'march': 0.275, 'syrian': 0.213, 'kyiv': 0.20..."


In [145]:
df_final_dataset.dtypes

city_resolvedAddress                            object
day_datetime                            datetime64[ns]
day_datetimeEpoch                                int64
day_tempmax                                    float64
day_tempmin                                    float64
day_temp                                       float64
day_dew                                        float64
day_humidity                                   float64
day_precip                                     float64
day_precipcover                                float64
day_solarradiation                             float64
day_solarenergy                                float64
day_uvindex                                    float64
day_sunrise                                     object
day_sunset                                      object
day_moonphase                                  float64
hour_datetime                                   object
hour_datetimeEpoch                               int64
hour_temp 

In [None]:
df_final_dataset.to_csv(f'{OUTPUT_DATA_FOLDER}/{OUTPUT_MERGED_DATASET}', sep=';', index=False)