In [1]:
import os
import glob
import pandas as pd
import time
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
pd.set_option('display.max_columns', None)

<span style="color:blue">The dataset downloaded from the waka kotahi website is too large for my machine. So i've simplified the problem, I will only look at the year 2018 and 2019 for now. So I will read all the files and store them as a single dataframe.</span>

# 1. Load Data

In [3]:
%time
folder_paths = ["./2018/", "./2019/"]

EBsiteref = ['01650013','01620013','01659011','01620011','01650011','01640011']
WBsiteref = ['01610013','01660013','01630013','01610011','01660011']
#EBsiteref = ['01620013',]
#WBsiteref = ['01610013']
csv_files = []
for folder_path in folder_paths:
    csv_files += glob.glob(folder_path + "/*.csv")

EBdata_frame = pd.DataFrame()
WBdata_fram = pd.DataFrame()

EB_data = []
WB_data = []

for filename in csv_files:
    df = pd.read_csv(filename, index_col=None, low_memory=True)
    EB_df= df[df['siteRef'].isin(EBsiteref)]
    EB_data.append(EB_df)
    
    WB_df= df[df['siteRef'].isin(WBsiteref)]
    WB_data.append(WB_df)
    
EBdata_frame = pd.concat(EB_data)
WBdata_frame = pd.concat(WB_data)



Wall time: 0 ns


<span style="color:blue">The data is too large! So I will just look at a section of the data - The te Atatu Junction and Lincoln Rd Junction. I will just filter the data for only the sensors in these two junctions.  To recognise patterns with respect to time which are dependent on whether its east bound or west bound, I will store east bound and west bound separately</span>

## **2. Check Data**

In [4]:
WBdata_frame['siteRef'].nunique()

5

In [5]:
WBdata_frame

Unnamed: 0,class,siteRef,startDatetime,endDatetime,direction,count
47241,L,01610011,01-MAR-2018 14:30,01-MAR-2018 14:45,1,796.5
47244,H,01610011,02-MAR-2018 04:00,02-MAR-2018 04:15,1,3.0
47245,L,01610011,02-MAR-2018 19:15,02-MAR-2018 19:30,1,567.5
47247,H,01610011,03-MAR-2018 02:30,03-MAR-2018 02:45,1,4.5
47251,H,01610011,05-MAR-2018 11:00,05-MAR-2018 11:15,1,79.5
...,...,...,...,...,...,...
14820471,L,01610013,22-NOV-2019 22:30,22-NOV-2019 22:45,1,206.5
14820482,L,01660011,27-NOV-2019 21:00,27-NOV-2019 21:15,4,260.5
14820486,L,01610013,30-NOV-2019 18:00,30-NOV-2019 18:15,1,482.5
14820488,L,01610013,01-DEC-2019 09:00,01-DEC-2019 09:15,1,376.5


 ## **2.1. Drop Unnecessary Columns**

In [6]:
#endDatetime is unecessary because we know the time intervals are 15mins
WBdata_frame = WBdata_frame.drop(['endDatetime','class'], axis=1)
EBdata_frame = EBdata_frame.drop(['endDatetime','class'], axis=1)

In [7]:
#convert time columns into datetime
WBdata_frame['startDatetime']=pd.to_datetime(WBdata_frame['startDatetime'])
EBdata_frame['startDatetime']=pd.to_datetime(EBdata_frame['startDatetime'])

## **2.2 Convert quarter-hour data into hourly**

In [8]:
# convert startDatetime and endDatetime columns to datetime objects
EBdata_frame['startDatetime'] = pd.to_datetime(EBdata_frame['startDatetime'])

# group by siteRef, direction, and startDatetime rounded to the nearest hour
EBdata_grouped = EBdata_frame.groupby([EBdata_frame['siteRef'], EBdata_frame['direction'], pd.Grouper(key='startDatetime', freq='H')]).sum().reset_index()

# print the grouped data
print(EBdata_grouped)

        siteRef  direction       startDatetime  count
0      01620011          2 2018-01-01 00:00:00  715.0
1      01620011          2 2018-01-01 01:00:00  749.0
2      01620011          2 2018-01-01 02:00:00  441.0
3      01620011          2 2018-01-01 03:00:00  404.0
4      01620011          2 2018-01-01 04:00:00  329.0
...         ...        ...                 ...    ...
71391  01659011          5 2019-12-31 19:00:00  249.0
71392  01659011          5 2019-12-31 20:00:00  204.0
71393  01659011          5 2019-12-31 21:00:00  193.0
71394  01659011          5 2019-12-31 22:00:00  216.0
71395  01659011          5 2019-12-31 23:00:00  172.0

[71396 rows x 4 columns]


In [9]:
# convert startDatetime and endDatetime columns to datetime objects
WBdata_frame['startDatetime'] = pd.to_datetime(WBdata_frame['startDatetime'])

# group by siteRef, direction, and startDatetime rounded to the nearest hour
WBdata_grouped = WBdata_frame.groupby([WBdata_frame['siteRef'], WBdata_frame['direction'], pd.Grouper(key='startDatetime', freq='H')]).sum().reset_index()

# print the grouped data
print(WBdata_grouped)

        siteRef  direction       startDatetime   count
0      01610011          1 2018-01-01 00:00:00  1221.0
1      01610011          1 2018-01-01 01:00:00  1148.0
2      01610011          1 2018-01-01 02:00:00   636.0
3      01610011          1 2018-01-01 03:00:00   406.0
4      01610011          1 2018-01-01 04:00:00   295.0
...         ...        ...                 ...     ...
84015  01660013          4 2019-12-31 19:00:00   777.0
84016  01660013          4 2019-12-31 20:00:00   619.0
84017  01660013          4 2019-12-31 21:00:00   556.0
84018  01660013          4 2019-12-31 22:00:00   506.0
84019  01660013          4 2019-12-31 23:00:00   350.0

[84020 rows x 4 columns]


## **2.3 Find missing time intervals**

In [10]:
# create a DatetimeIndex with all the expected quarter-hour intervals
dt_index = pd.date_range(start='2018-01-01 00:00:00', end='2019-12-31 23:00:00', freq='1H')

# create a DataFrame with the DatetimeIndex1
expected_df = pd.DataFrame(index=dt_index)

In [11]:
expected_df.head()

2018-01-01 00:00:00
2018-01-01 01:00:00
2018-01-01 02:00:00
2018-01-01 03:00:00
2018-01-01 04:00:00


In [12]:
#filter out each siteRef
import pandas as pd

def filter_and_concatenate_dataframes(df, column_name, expected_df):
    """
    Filters a dataframe by a specified column, sets the 'startDatetime'
    column as the index, and merges each filtered dataframe with an
    'expected_df' on the index. Returns a concatenated dataframe that
    includes all filtered and merged data.
    
    Parameters:
        df (pandas.DataFrame): The dataframe to filter.
        column_name (str): The name of the column to filter by.
        expected_df (pandas.DataFrame): The dataframe to merge with.
    
    Returns:
        pandas.DataFrame: A concatenated dataframe that includes all
        filtered and merged data.
    """
    # Create a list to store dataframes
    dfs = []
    
    # Loop through each unique value in the specified column
    for val in df[column_name].unique():
        # Filter rows with the current value
        filtered_df = df[df[column_name] == val]
        # Set the 'startDatetime' column as the index
        filtered_df = filtered_df.set_index('startDatetime')
        # Merge the filtered dataframe with the expected dataframe on the index
        merged_df = pd.merge(expected_df, filtered_df, how='left', left_index=True, right_index=True)
        # Fill NaN values in the 'siteRef' column with the value from the previous row
        merged_df['siteRef'] = merged_df['siteRef'].fillna(val)
        # Add the merged dataframe to the list
        dfs.append(merged_df)
    
    # Concatenate all dataframes in the list
    concatenated_df = pd.concat(dfs)
    
    # Return the concatenated dataframe
    return concatenated_df


In [13]:
groupedWB = filter_and_concatenate_dataframes(WBdata_grouped,'siteRef', expected_df)
groupedEB = filter_and_concatenate_dataframes(EBdata_grouped,'siteRef', expected_df)

In [14]:
groupedWB.isna().sum()

siteRef         0
direction    3580
count        3580
dtype: int64

In [15]:
groupedEB.isna().sum()

siteRef          0
direction    33724
count        33724
dtype: int64

In [16]:
groupedEB['siteRef'].nunique()

6

In [17]:
groupedEB = groupedEB.reset_index().rename(columns={'index': 'startDatetime'})
groupedWB = groupedWB.reset_index().rename(columns={'index': 'startDatetime'})

In [18]:
groupedEB.isna().sum()

startDatetime        0
siteRef              0
direction        33724
count            33724
dtype: int64

<span style="color:blue"> I will now merge with site info data which is available separately </span>

# 3. Merge with Site Information Data

In [19]:
monitoring_sites = pd.read_csv('State_highway_traffic_monitoring_sites.csv')

In [20]:
#drop unecessary columns
monitoring_sites = monitoring_sites.drop(['RS', 'RP', 'SH','equipmentCurrent', 'region'], axis=1)

In [21]:
#merge data 
# join the dataframes on the siteRef column
joined_EB = pd.merge(groupedEB, monitoring_sites, on='siteRef', how='left')
joined_WB =  pd.merge(groupedWB, monitoring_sites, on='siteRef', how='left')

In [22]:
joined_EB.shape

(105120, 18)

In [23]:

# Convert the date string column to a date and time column - EB
joined_EB['startDatetime'] = pd.to_datetime(joined_EB['startDatetime'], format='%d-%b-%Y %H:%M')


# Convert the date string column to a date and time column - WB
joined_WB['startDatetime'] = pd.to_datetime(joined_WB['startDatetime'], format='%d-%b-%Y %H:%M')


In [24]:
#create a date column
joined_EB['date'] = joined_EB['startDatetime'].dt.date
joined_WB['date'] = joined_WB['startDatetime'].dt.date

In [25]:
joined_EB.isna().sum()

startDatetime        0
siteRef              0
direction        33724
count            33724
X                    0
Y                    0
OBJECTID             0
lane                 0
type                 0
percentHeavy         0
description          0
acceptedDays         0
AADT5yearsAgo        0
AADT4yearsAgo        0
AADT3yearsAgo        0
AADT2yearsAgo        0
AADT1yearAgo         0
siteType             0
date                 0
dtype: int64

# 4. Merge with Weather Data

In [26]:
weather_2019= pd.read_csv('weather.csv', parse_dates=True)

In [27]:
weather_2018=pd.read_csv('weather_2018.csv', parse_dates=True)

In [28]:
weather=pd.concat([weather_2018,weather_2019])

In [29]:
weather=weather.reset_index(drop=True)

In [30]:
#convert time columns to right format
weather['Date time'] = pd.to_datetime(weather['Date time'])


In [31]:
#extract date
weather['date'] = weather['Date time'].dt.date

In [32]:
weather.shape

(730, 21)

In [33]:
weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 730 entries, 0 to 729
Data columns (total 21 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date time            730 non-null    datetime64[ns]
 1   Minimum Temperature  730 non-null    float64       
 2   Maximum Temperature  730 non-null    float64       
 3   Temperature          730 non-null    float64       
 4   Dew Point            730 non-null    float64       
 5   Relative Humidity    730 non-null    float64       
 6   Heat Index           17 non-null     float64       
 7   Wind Speed           730 non-null    float64       
 8   Wind Gust            115 non-null    float64       
 9   Wind Direction       730 non-null    float64       
 10  Wind Chill           158 non-null    float64       
 11  Precipitation        730 non-null    float64       
 12  Precipitation Cover  730 non-null    float64       
 13  Visibility           730 non-null  

In [34]:
weather['Conditions'].unique()

array(['Rain, Overcast', 'Partially cloudy', 'Clear', 'Overcast',
       'Rain, Partially cloudy', 'Rain'], dtype=object)

In [35]:
#Drop unecessary columns
weather = weather.drop(['Heat Index','Wind Gust','Wind Direction','Weather Type', 'Latitude','Longitude', 'Wind Chill'], axis=1)

In [36]:
weather.head()

Unnamed: 0,Date time,Minimum Temperature,Maximum Temperature,Temperature,Dew Point,Relative Humidity,Wind Speed,Precipitation,Precipitation Cover,Visibility,Cloud Cover,Sea Level Pressure,Conditions,date
0,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",2018-01-01
1,2018-01-02,18.0,27.0,21.9,17.7,79.01,11.4,0.0,0.0,6.7,51.4,1012.2,Partially cloudy,2018-01-02
2,2018-01-03,17.0,27.1,21.5,17.7,79.56,16.1,0.0,0.0,7.3,47.9,1011.6,Partially cloudy,2018-01-03
3,2018-01-04,20.0,22.0,21.1,19.7,91.94,27.7,11.6,8.33,5.5,80.9,1005.1,"Rain, Overcast",2018-01-04
4,2018-01-05,18.1,22.0,20.2,18.7,91.25,33.3,12.6,8.33,4.7,92.4,989.9,"Rain, Overcast",2018-01-05


In [37]:
traffic_weatherWB = pd.merge(joined_WB, weather, on='date', how='left')
traffic_weatherEB = pd.merge(joined_EB, weather, on='date', how='left')

In [38]:
traffic_weatherEB.shape

(105120, 32)

In [39]:
traffic_weatherWB.shape

(87600, 32)

# 5. Merge with holiday data

In [40]:
holidays_2019 = pd.read_csv('holidays_2019.csv', parse_dates=True)
holidays_2018 = pd.read_csv('holidays_2018.csv',parse_dates=True)

In [41]:
#convert time columns to right format
holidays_2019['date'] = pd.to_datetime(holidays_2019['date'])


In [42]:
holidays = pd.concat([holidays_2018,holidays_2019])

In [43]:
holidays = holidays.reset_index(drop=True)

In [44]:
holidays['date'] = pd.to_datetime(holidays['date'])

In [45]:
traffic_weatherWB['date'] = pd.to_datetime(traffic_weatherWB['date'])
traffic_weatherEB['date'] = pd.to_datetime(traffic_weatherEB['date'])

In [46]:
#merge holidays and traffic
trafficWB_merged = pd.merge(traffic_weatherWB, holidays, on='date', how='left')
trafficEB_merged = pd.merge(traffic_weatherEB, holidays, on='date', how='left')

In [47]:
holidays

Unnamed: 0,date,Hol_type
0,2018-01-01,PublicHoliday
1,2018-01-02,PublicHoliday
2,2018-01-03,SchoolHoliday
3,2018-01-04,SchoolHoliday
4,2018-01-05,SchoolHoliday
...,...,...
725,2019-12-27,SchoolHoliday
726,2019-12-28,Weekend
727,2019-12-29,Weekend
728,2019-12-30,SchoolHoliday


In [48]:
trafficWB_merged

Unnamed: 0,startDatetime,siteRef,direction,count,X,Y,OBJECTID,lane,type,percentHeavy,description,acceptedDays,AADT5yearsAgo,AADT4yearsAgo,AADT3yearsAgo,AADT2yearsAgo,AADT1yearAgo,siteType,date,Date time,Minimum Temperature,Maximum Temperature,Temperature,Dew Point,Relative Humidity,Wind Speed,Precipitation,Precipitation Cover,Visibility,Cloud Cover,Sea Level Pressure,Conditions,Hol_type
0,2018-01-01 00:00:00,01610011,1.0,1221.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
1,2018-01-01 01:00:00,01610011,1.0,1148.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
2,2018-01-01 02:00:00,01610011,1.0,636.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
3,2018-01-01 03:00:00,01610011,1.0,406.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
4,2018-01-01 04:00:00,01610011,1.0,295.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87595,2019-12-31 19:00:00,01660013,4.0,777.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday
87596,2019-12-31 20:00:00,01660013,4.0,619.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday
87597,2019-12-31 21:00:00,01660013,4.0,556.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday
87598,2019-12-31 22:00:00,01660013,4.0,506.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday


# 5. Save Datasets to file

In [49]:
trafficWB_merged = trafficWB_merged.set_index('startDatetime')
trafficEB_merged = trafficEB_merged.set_index('startDatetime')

In [50]:
trafficWB_merged

Unnamed: 0_level_0,siteRef,direction,count,X,Y,OBJECTID,lane,type,percentHeavy,description,acceptedDays,AADT5yearsAgo,AADT4yearsAgo,AADT3yearsAgo,AADT2yearsAgo,AADT1yearAgo,siteType,date,Date time,Minimum Temperature,Maximum Temperature,Temperature,Dew Point,Relative Humidity,Wind Speed,Precipitation,Precipitation Cover,Visibility,Cloud Cover,Sea Level Pressure,Conditions,Hol_type
startDatetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1
2018-01-01 00:00:00,01610011,1.0,1221.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
2018-01-01 01:00:00,01610011,1.0,1148.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
2018-01-01 02:00:00,01610011,1.0,636.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
2018-01-01 03:00:00,01610011,1.0,406.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
2018-01-01 04:00:00,01610011,1.0,295.0,174.654029,-36.858365,6053,Inc,Continuous,8.4,SH16 Te Atatu Rd Interchange WB,365.0,37636.0,39823.0,40574.0,37891.0,39148.0,Regional Continuous,2018-01-01,2018-01-01,18.0,24.1,20.6,16.9,80.11,13.9,3.0,4.17,7.0,83.2,1010.8,"Rain, Overcast",PublicHoliday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31 19:00:00,01660013,4.0,777.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday
2019-12-31 20:00:00,01660013,4.0,619.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday
2019-12-31 21:00:00,01660013,4.0,556.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday
2019-12-31 22:00:00,01660013,4.0,506.0,174.634215,-36.849577,6130,Ramp,Continuous,8.7,SH16 Lincoln Rd Off Ramp WB,356.0,18595.0,18954.0,19149.0,16846.0,16922.0,Regional Continuous,2019-12-31,2019-12-31,17.0,23.0,19.0,13.7,71.49,16.5,0.0,0.00,7.0,64.0,1015.5,Partially cloudy,SchoolHoliday


In [51]:
trafficWB_merged.to_csv('InitialWB_data.csv')
trafficEB_merged.to_csv('InitialEB_data.csv')

In [52]:
# We now have two datasets categorised by direction, east or west and traffic counts from a range of sites.