Process the daily traffic volume file and group to a monthly level

In [22]:
import numpy as np
import pandas as pd
import plotly.express as px

In [19]:
pd.options.display.float_format = '{:,.2f}'.format

## Daily highway traffic counts data

Daily highways count - https://opendata-nzta.opendata.arcgis.com/datasets/NZTA::tms-daily-traffic-counts-api/explore

In [2]:
daily_traffic_volume = pd.read_csv('Auckland_TMS_Telemetry_Sites.csv')

In [3]:
daily_traffic_volume.shape

(1513258, 10)

In [4]:
daily_traffic_volume.head()

Unnamed: 0,OBJECTID,Start Date,Site Alias,Region Name,Site Reference,Class Weight,Site Description,Lane Number,Flow Direction,Traffic Count
0,205,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Heavy,SH16 Nth of Coatesville Riverhead Highway,1,1,144.5
1,206,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Heavy,SH16 Nth of Coatesville Riverhead Highway,2,2,179.0
2,207,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Light,SH16 Nth of Coatesville Riverhead Highway,1,1,8498.5
3,208,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Light,SH16 Nth of Coatesville Riverhead Highway,2,2,10280.0
4,209,12/31/2017 12:00:00 PM,340,02 - Auckland,1600058,Light,SH16 Nth of Kahikatea Flat Rd (Kaukapakapa Bri...,1,1,1701.5


In [5]:
# Split the Start Date and check if the time provided was in the am or pm. If it was pm then add one day to the reported data

daily_traffic_volume['Split Start Date'] = daily_traffic_volume['Start Date'].str.split()
daily_traffic_volume['Date'] = daily_traffic_volume['Split Start Date'].str[0]
daily_traffic_volume['AM PM'] = daily_traffic_volume['Split Start Date'].str[2]
daily_traffic_volume['Date'] = pd.to_datetime(daily_traffic_volume['Date'])
daily_traffic_volume['Adjusted Date'] = np.where(daily_traffic_volume['AM PM'] == 'PM', daily_traffic_volume['Date'] + pd.Timedelta(days=1), daily_traffic_volume['Date'])


In [6]:
daily_traffic_volume.head()

Unnamed: 0,OBJECTID,Start Date,Site Alias,Region Name,Site Reference,Class Weight,Site Description,Lane Number,Flow Direction,Traffic Count,Split Start Date,Date,AM PM,Adjusted Date
0,205,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Heavy,SH16 Nth of Coatesville Riverhead Highway,1,1,144.5,"[12/31/2017, 12:00:00, PM]",2017-12-31,PM,2018-01-01
1,206,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Heavy,SH16 Nth of Coatesville Riverhead Highway,2,2,179.0,"[12/31/2017, 12:00:00, PM]",2017-12-31,PM,2018-01-01
2,207,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Light,SH16 Nth of Coatesville Riverhead Highway,1,1,8498.5,"[12/31/2017, 12:00:00, PM]",2017-12-31,PM,2018-01-01
3,208,12/31/2017 12:00:00 PM,334,02 - Auckland,1600024,Light,SH16 Nth of Coatesville Riverhead Highway,2,2,10280.0,"[12/31/2017, 12:00:00, PM]",2017-12-31,PM,2018-01-01
4,209,12/31/2017 12:00:00 PM,340,02 - Auckland,1600058,Light,SH16 Nth of Kahikatea Flat Rd (Kaukapakapa Bri...,1,1,1701.5,"[12/31/2017, 12:00:00, PM]",2017-12-31,PM,2018-01-01


In [42]:
# Create additional columns from the  "Start Date" column

daily_traffic_volume['Period'] = daily_traffic_volume['Date'].apply(lambda x: x.replace(day=1))
daily_traffic_volume['Year'] = daily_traffic_volume['Date'].dt.year
daily_traffic_volume['Month'] = daily_traffic_volume['Date'].dt.month
daily_traffic_volume['HalfYear'] = daily_traffic_volume.apply(lambda row: f"{row['Year']}H1" if row['Month'] <= 6 else f"{row['Year']}H2", axis=1)

In [119]:
filtered_daily_traffic_volume =  daily_traffic_volume[(daily_traffic_volume['Date'] >= '2018-01-01')&(daily_traffic_volume['Date'] <= '2024-04-30')]

### Daily traffic volumes

In [124]:

agg_daily_traffic_volume = filtered_daily_traffic_volume.groupby(['Year',
                                                                  'Date','HalfYear','Period'])['Traffic Count'].sum().reset_index()
# agg_daily_traffic_volume['Period'] = agg_daily_traffic_volume['Date'].apply(lambda x: x.replace(day=1))
fig = px.line(agg_daily_traffic_volume, x='Date', y='Traffic Count', 
              title='Traffic Count Over Time')
fig.update_yaxes(range=[0, agg_daily_traffic_volume['Traffic Count'].max()])
fig.show()

#### Annual level aggregation

In [126]:
total_annual_traffic = agg_daily_traffic_volume.groupby('Year')['Traffic Count'].sum().reset_index()
fig = px.bar(total_annual_traffic, x='Year', y='Traffic Count', 
              title='Total Annual Traffic')
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=min(total_annual_traffic['Year']),
        dtick=1,
        tickangle=90
    )
)
fig.show()

In [125]:
average_daily_traffic = agg_daily_traffic_volume.groupby('Year')['Traffic Count'].mean().reset_index()
fig = px.bar(average_daily_traffic, x='Year', y='Traffic Count', 
              title='Average Daily Traffic')
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=min(average_daily_traffic['Year']),
        dtick=1,
        tickangle=90
    )
)
fig.show()

#### Aggregate to half year level

In [129]:
total_half_year_traffic = agg_daily_traffic_volume.groupby('HalfYear')['Traffic Count'].sum().reset_index()
fig = px.bar(total_half_year_traffic, x='HalfYear', y='Traffic Count', 
              title='Total Traffic Per Half Year')
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=min(total_half_year_traffic['HalfYear']),
        dtick=1,
        tickangle=90
    )
)
fig.show()

In [128]:
average_daily_traffic_half_year = agg_daily_traffic_volume.groupby('HalfYear')['Traffic Count'].mean().reset_index()
fig = px.bar(average_daily_traffic_half_year, x='HalfYear', y='Traffic Count', 
              title='Average Daily Traffic - Per Half Year')
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=min(average_daily_traffic_half_year['HalfYear']),
        dtick=1,
        tickangle=90
    )
)
fig.show()

In [133]:
# Save the daily traffic to a csv file
average_daily_traffic_half_year.to_csv('auckland_daily_average_traffic_half_year.csv',index=False)

### Population data

https://www.macrotrends.net/global-metrics/cities/21957/auckland/population

The data for each year is estimated at the last day of the year

In [156]:
auckland_population_df = pd.read_csv('Auckland-population.csv', skiprows=14)
auckland_population_df['Year']  = auckland_population_df['date'].str[:4].astype(int)
auckland_population_df = auckland_population_df.drop(columns=([' Annual Change']))
auckland_population_df =  auckland_population_df.rename(columns={' Population': 'Population'})

In [157]:
auckland_population_df =  auckland_population_df[(auckland_population_df['Year'] >=1999) & (auckland_population_df['Year'] <=2026)]

In [158]:
fig = px.bar(auckland_population_df, x='Year', y='Population', 
              title='Auckland population by year')
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=min(auckland_population_df['Year']),
        dtick=1,
        tickangle=90
    )
)
fig.show()

In [159]:
# Estimate the population in the middle of each half year

new_rows = []

for i in range(1, len(auckland_population_df)):
    prev_year_pop = auckland_population_df['Population'].iloc[i-1]
    curr_year_pop = auckland_population_df['Population'].iloc[i]
    year_diff = curr_year_pop - prev_year_pop

    # Estimate for March (end of Q1)
    march_pop = prev_year_pop + (year_diff / 4)
    new_rows.append({'HalfYear': f"{auckland_population_df['Year'].iloc[i]}H1", 'Population': march_pop})

    # Estimate for September (end of Q3)
    september_pop = prev_year_pop + (3 * year_diff / 4)
    new_rows.append({'HalfYear': f"{auckland_population_df['Year'].iloc[i]}H2", 'Population': september_pop})

auckland_half_year_population = pd.DataFrame(new_rows)

In [161]:
fig = px.bar(auckland_half_year_population, x='HalfYear', y='Population', 
              title='Auckland population by half year')
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=min(auckland_half_year_population['HalfYear']),
        dtick=1,
        tickangle=90
    )
)
fig.show()

In [163]:

auckland_half_year_population.to_csv('auckland_half_annual_population.csv', index=False)