# Big Data AADT Comparison Across Platforms
- Caltrans Traffic Census year == 2022 (federal FY)  


In [1]:
# import modules
import pandas as pd
import numpy as np
import warnings
import altair as alt
#!pip install altair vega selenium
from altair_saver import save # To save the PNG file(s)

In [2]:
# Define the path to the data on Google Cloud Storage (GCS)
path = "gs://calitp-analytics-data/data-analyses/big_data/compare_traffic_counts/4_i80_all/"

In [3]:
# Assign the dataframe names
df01_ct = f'{path}ct_tc_i80_2022.csv'
df01_rep = f'{path}replica-i80_corridor_fall_2022-07_01_24-network_link_layer.csv'
df01_stl = f'{path}1761875_i80_corridor_2022_network_performance.csv'

In [4]:
# create a function to import the data from a csv file
def getData(path):
    # Suppress warning
    warnings.filterwarnings("ignore")
    
    # reads in the data from a .csv file
    df = pd.read_csv(path)
    return df

# Create a function to remove spaces and make header characters lowercase
def clean_headers(df):
    cleaned_columns = []
    for column in df.columns:
        cleaned_column = column.replace(" ", "").lower()
        cleaned_columns.append(cleaned_column)
    df.columns = cleaned_columns
    return df

In [5]:
# Pull in the Data
df01_ct = getData(df01_ct)
df01_rep = getData(df01_rep)
df01_stl = getData(df01_stl)

In [6]:
#Clean the headers
df01_ct = clean_headers(df01_ct)
df01_rep = clean_headers(df01_rep)
df01_stl = clean_headers(df01_stl)

## Caltrans
- Caltrans Traffic Census Data is from Federal FY 2022
- geodatabase pull from Traffic Opts Open Data  
    - https://gisdata-caltrans.opendata.arcgis.com/datasets/d8833219913c44358f2a9a71bda57f76_0/explore?location=41.381207%2C-119.640977%2C6.21

In [7]:
# A function to filter the Caltrans Traffic Census data to only include 10 locations along Highway 50 (Folsom to Echo Summit)
def filter_ct_traffic_census(df, field, county_order=None):
    
    # Filter the DataFrame based on ['location_description'] values
    filtered_locations = ['ROSEVILLE, DOUGLAS BOULEVARD',
                          'JCT. RTE. 65',
                          'ROCKLIN, SIERRA COLLEGE BOULEVARD',
                          'NEWCASTLE ROAD',
                          'AUBURN, JCT. RTE. 49',
                          'AUBURN RAVINE ROAD',
                          'HAINES ROAD',
                          'CLIPPER GAP',
                          'WEIMAR CROSSROAD',
                          'COLFAX, JCT. RTE. 174 NORTH',
                          'MONTE VISTA',
                          'BLUE CANYON ROAD',
                          'JCT. RTE. 20 WEST',
                          'KINGVALE',
                          'JCT. RTE. 89 NORTH, JCT. RTE. 267 SOUTH',
                          'HIRSCHDALE ROAD OVERHEAD (BOCA)',
                          'NEVADA STATE LINE',
                          'SODA SPRINGS',
                          'SODA SPRINGS OC',
                          'DONNER LAKE']
    df = df[df[field].isin(filtered_locations)]
    
    # Rename the locations and put the new names in a field called 'location'
    location_name_mapping = {
        'ROSEVILLE, DOUGLAS BOULEVARD': 'Douglas Boulevard',
        'JCT. RTE. 65': 'Junction, Route 65',
        'ROCKLIN, SIERRA COLLEGE BOULEVARD': 'Sierra College Boulevard',
        'NEWCASTLE ROAD': 'New Castle',
         'AUBURN, JCT. RTE. 49': 'Hwy 49',
         'AUBURN RAVINE ROAD': 'Foresthill Road',
         'HAINES ROAD': 'Haines Road',
         'CLIPPER GAP': 'Clipper Gap',
         'WEIMAR CROSSROAD': 'Weimar Cross Road',
         'COLFAX, JCT. RTE. 174 NORTH': 'Colfax',
         'MONTE VISTA': 'Monte Vista',
         'BLUE CANYON ROAD': 'Blue Canyon Road',
         'JCT. RTE. 20 WEST': 'Junction Route 20',
         'KINGVALE': 'Kingvale',
         'JCT. RTE. 89 NORTH, JCT. RTE. 267 SOUTH': 'Junction Highway 89',
         'HIRSCHDALE ROAD OVERHEAD (BOCA)': 'Hirschdale Road',
         'NEVADA STATE LINE': 'Nevada State Line',
         'SODA SPRINGS': 'Soda Springs',
         'SODA SPRING OC': 'Soda Springs',
         'DONNER LAKE': 'Donner Lake'
    }
    
    # Apply the mapping to the 'location_description' column to create the 'location' column
    df['location'] = df[field].map(location_name_mapping)  
    
    return df

In [8]:
# Filtering the data by the names found in the 'location_description' field
#field == ['location_description]
df01_ct = filter_ct_traffic_census(df01_ct, 'location_description')

In [9]:
# Create a function to identify the location order
def location_order(df):
    # Rename the locations and put the new names in a field called 'location'
    location_order_mapping = {
         'Douglas Boulevard': 0,
         'Junction, Route 65': 1,
         'Sierra College Boulevard': 2,
         'New Castle': 3,
         'Hwy 49': 4,
         'Foresthill Road': 5,
         'Haines Road': 6,
         'Clipper Gap': 7,
         'Weimar Cross Road': 8,
         'Colfax': 9,
         'Monte Vista': 10, 
         'Junction Route 20': 11, 
         'Blue Canyon Road': 12,
         'Kingvale': 13,
         'Soda Springs': 14,
         'Donner Lake': 15,
         'Junction Highway 89': 16,
         'Junction 267': 17,
         'Hirschdale Road': 18,
         'Nevada State Line': 19        
    }
    
    # Apply the mapping to the 'location' column to create the 'location_order' column
    df['location_order'] = df['location'].map(location_order_mapping)
    
    return df

In [10]:
# Define the location order and create a field called ['location_order']
df01_ct = location_order(df01_ct)

In [11]:
def drop_duplicates(df, column_name):
    
    # Drop duplicates in the specified column for the specific source
    df = df.drop_duplicates(subset=[column_name])
    
    sorted_df = df.sort_values(by='location_order')
    
    return sorted_df

In [12]:
# Caltrans Data has duplicates (same location/same postmile - but have different ['OBJECTID']'s
df01_ct = drop_duplicates(df01_ct, 'location')

In [13]:
# Create a function to add the year, source, and route to the dataframe
def add_year_source_route(df, year, source, route):
    # Add year column
    df['year'] = year
    
    # Add Source Information
    df['source'] = source
    
    # Add Route Number column called 'route'
    df['route_number'] = route
        
    return df

In [14]:
# Using the 'add_year_source_route' function this script will add the year == 2022, source == Caltrans Traffic Census, and Route == 80 
df01_ct = add_year_source_route(df01_ct, '2022', 'Caltrans Traffic Census', '80')

In [15]:
# Create a new column that averages out the back and ahead AADT numbers
def calculate_ct_tc_volume(df):
    df['volume'] = (df['ahead_aadt'] + df['back_aadt'])/2
    
    return df

In [16]:
# Create a new column called ['volume'] that can be used to compare Caltrans Traffic Census data to other platforms such as StreetLight and/or Replica
df01_ct = calculate_ct_tc_volume(df01_ct)

In [17]:
# Create the Altair chart
def ct_ahead_back_bar_chart(df):  
    df_grouped = df.groupby(['location_order', 'location']).mean().reset_index()   
    
    # Melt the DataFrame to have a single column for values and another for value type
    df_melted = df_grouped.melt(id_vars=['location'], value_vars=['back_aadt', 'ahead_aadt', 'volume'],
                        var_name='value_type', value_name='value')
    
    #Create the bar chart
    chart = alt.Chart(df_melted).mark_bar().encode(
        x=alt.X('location:N', title='Caltrans Traffic Census Location', sort=alt.EncodingSortField(field='location_order', order='ascending')),
        y=alt.Y('value:Q', title='Value'),
        color=alt.Color('value_type:N', title='Value Type'),
        #column=alt.Column('value_type:N', title='Value Type'),
        xOffset='value_type:N', # Offset bars by back_aadt and ahead_aadt to make them side-by-side
        tooltip=[
            alt.Tooltip('location:N', title='Caltrans Traffic Census Location'),
            alt.Tooltip('value_type:N', title='Value Type'),
            alt.Tooltip('value:Q', title='Value'),
        ]
    ).properties(
        width=1000, # Controls the width of each bar
        height=500 # Controls the height of the chart
    ).configure_view(
        strokeOpacity=0 # Removes gridlines for better visualization
    ).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    ).configure_header(
        titleFontSize=14,
        labelFontSize=12
    )
        
    return chart

In [18]:
# create a variable that can be used to display the bar chart that was created using the 'ct_ahead_back_bar_chart'
df01_ct_chart = ct_ahead_back_bar_chart(df01_ct)

### Caltrans AADT ['ahead_aadt'] and ['back_aadt'] Data compared and averaged into ['volume']

In [19]:
# Display the bar chart
df01_ct_chart

In [20]:
#df01_ct.to_csv('caltrans_tc_i80_2022.csv', index=False)

In [21]:
# Create a subset of the data to include only fields that are needed to create a visualization to compare volumes across platforms
def create_bd_subset(df):
    df = df[['year', 'source', 'route_number', 'location', 'volume']]
    
    return df

In [22]:
# Create a subset of the data using the 'create_bd_subset' function
df01_ct = create_bd_subset(df01_ct)

In [23]:
df01_ct.columns

Index(['year', 'source', 'route_number', 'location', 'volume'], dtype='object')

## Replica
- Replica Study Name == 'i80_corridor_fall_2022'  
- Replica data Fall 2022  
        - Selected to best match the latest available Caltrans aadt data == 2022
- Filter  
        - (Primary Mode Filter == 'Commercial vehicle (freight)', 'Private auto')  
- Typical Day == Thursday

In [24]:
# Group by 'osmid', 'startlat', 'startlon'
df01_rep = df01_rep.groupby(['osmid', 'networklinkid', 'direction', 'startlat', 'startlon'], as_index=False).agg(
    total_trip_count=pd.NamedAgg(column='trip_count', aggfunc='sum'),
    record_count=pd.NamedAgg(column='osmid', aggfunc='count')
)

# Calculate the average trip count per location
df01_rep['average_trip_count'] = df01_rep['total_trip_count']/df01_rep['record_count']

In [25]:
# Create a function to identify the location/direction
# enter the dataframe and the field that contains a unique identifier for the record
def location_and_direction(df, field):
    location_mapping = {
        7097013969104967421: 'Douglas Boulevard', # East
        9802375355628235486: 'Douglas Boulevard', # West
        11749790242771947201: 'Junction, Route 65', # West
        2998486274327168794: 'Junction, Route 65', # East
        12997097876657250024: 'Sierra College Boulevard', # West
        4113932617386932443: 'Sierra College Boulevard', # East
        1820282868847123302: 'New Castle', # West
        2153809304064543638: 'New Castle', # East
        14803610523032640730: 'Hwy 49', # West
        7830880626249125726: 'Hwy 49', # East
        1907405116392175116: 'Foresthill Road', # West
        4375580955572506113: 'Foresthill Road', # East
        18406983422774616406: 'Haines Road', # West
        17075635813352358085: 'Haines Road', # East
        7106327891903328569: 'Clipper Gap', # West
        5504117348304693689: 'Clipper Gap', # East
        11135734386205311321: 'Weimar Cross Road', # West
        13465876789817826756: 'Weimar Cross Road', # East
        5883099883246745450: 'Colfax', # West
        7503046201223688730: 'Colfax', # East
        9927974786327901519: 'Monte Vista', # West
        11960647803649989008: 'Monte Vista', # East
        13608193047039053587: 'Blue Canyon Road', # West
        2007974862913008361: 'Blue Canyon Road', # East
        13357260780098436525: 'Junction Route 20', # West
        5462939803201534567: 'Junction Route 20', # East
        6267274075468205672: 'Kingvale', # West
        9959731254226177403: 'Kingvale', # East
        10470325572430616840: 'Soda Springs', # West
        17089588549017153821: 'Soda Springs', # East
        16926764014851405856: 'Donner Lake', # West
        15201662489874611667: 'Donner Lake', # East
        3677820178405293835: 'Junction Highway 89', # West
        1268109964243464445: 'Junction Highway 89', # East
        10152161345659531981: 'Junction 267', # West
        10071749508719544966: 'Junction 267', # East
        9241689629477775206: 'Hirschdale Road', # West
        13670397138126918019: 'Hirschdale Road', # East
        17007372610764872315: 'Nevada State Line', # West
        13632071022469478595: 'Nevada State Line' # East
    }
    
    # Apply the mapping to the 'networklinkid' column to create the 'ew_direction' column
    df['location'] = df[field].map(location_mapping)

    location_direction = {
        7097013969104967421: 'East', # Douglas Boulevard
        9802375355628235486: 'West', # Douglas Boulevard
        11749790242771947201: 'West', # Junction Route 65
        2998486274327168794: 'East', # Junction Route 65
        12997097876657250024: 'West', # Sierra College Boulevard
        4113932617386932443: 'East', # Sierra College Boulevard
        1820282868847123302: 'West', # New Castle
        2153809304064543638: 'East', # New Castle
        14803610523032640730: 'West', # Highway 49
        7830880626249125726: 'East', # Highway 49
        1907405116392175116: 'West', # Foresthill Road
        4375580955572506113: 'East', # Foresthill Road
        18406983422774616406: 'West', # Haines Road
        17075635813352358085: 'East', # Haines Road
        7106327891903328569: 'West', # Clipper Gap
        5504117348304693689: 'East', # Clipper Gap
        11135734386205311321: 'West', # Weimar Cross Road
        13465876789817826756: 'East', # Weimar Cross Road
        5883099883246745450: 'West', # Colfax
        7503046201223688730: 'East', # Colfax
        9927974786327901519: 'West', # Monte Vista
        11960647803649989008: 'East', # Monte Vista
        13608193047039053587: 'West', # Blue Canyon
        2007974862913008361: 'East', # Blue Canyon
        13357260780098436525: 'West', # Junction Route 20
        5462939803201534567: 'East', # Junction Route 20
        6267274075468205672: 'West', # Kingvale
        9959731254226177403: 'East', # Kingvale
        10470325572430616840: 'West', # Soda Springs
        17089588549017153821: 'East', # Soda Springs
        16926764014851405856: 'West', # Donner Lake
        15201662489874611667: 'East', # Donner Lake
        3677820178405293835: 'West', # Junction Highway 89
        1268109964243464445: 'East', # Junction Highway 89
        10152161345659531981: 'West', # Junction 267
        10071749508719544966: 'East', # Junction 267
        9241689629477775206: 'West', # Hirschdale Road
        13670397138126918019: 'East', # Hirschdale Road
        17007372610764872315: 'West', # Nevada State Line
        13632071022469478595: 'East' # Nevada State Line
    }
    
    # Apply the mapping to the 'networklinkid' column to create the 'ew_direction' column
    df['ew_direction'] = df[field].map(location_direction)
    
    return df

In [26]:
df01_rep = location_and_direction(df01_rep, 'networklinkid')

In [27]:
df01_rep = location_order(df01_rep)

In [28]:
df01_rep = add_year_source_route(df01_rep, '2022', 'Replica', '80')

In [29]:
# Calculate the total Traffic Volume for each location
df01_rep['volume'] = df01_rep.groupby('location')['average_trip_count'].transform('sum')

# drop the duplicates from the 'location' volume
df01_rep = df01_rep.drop_duplicates(subset=['location'])

In [30]:
# A function to create a bar chart for the StreetLight Data to show Hwy 50 Traffic Volumes
def bd_volume_bar_chart(df, unique_title):
    # Sort the DataFrame by 'location_order'
    df = df.sort_values(by='location_order')
    
    # Group by 'location' and 'location_order' and sum the 'averagedailiy segmenttraffic(stlvolume)'
    df_grouped = df.groupby(['location', 'location_order']).sum().reset_index()
    
    # Create the bar chart
    chart = alt.Chart(df_grouped).mark_bar().encode(
        x=alt.X('location:N', title=unique_title, sort=alt.EncodingSortField(field='location_order', order='ascending')),
        y=alt.Y('volume:Q', title='Volume'),
        tooltip=[
            alt.Tooltip('location:N', title=unique_title),
            alt.Tooltip('volume:Q', title='Volume')
        ]
    ).properties(
        width=1000, # Controls the width of the chart
        height=500, # Controls the height of the chart
        
    ).configure_view(
        strokeOpacity=0 # Removes gridlines for better visualization
    ).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    )
    
    return chart

In [31]:
df01_rep_chart = bd_volume_bar_chart(df01_rep, "Replica Volume I80 2022")

In [32]:
df01_rep_chart

In [33]:
#df01_rep.to_csv('replica_i80_2022.csv', index=False)

In [34]:
# Create a subset of the data using the 'create_bd_subset' function
df01_rep = create_bd_subset(df01_rep)

## StreetLight
- Analysis Name == 'i80_corridor_2022'

In [35]:
# Createa  a function to identify the location by name using the identified field
def classify_location_stl(df, field):
    # Define a mapping for Network Link IDs to Locations
    location_mapping = {
        "Alan S. Hart Freeway / 972215103 / 1": 'Douglas Boulevard', # West # "Roseville, Douglas Boulevard"
        "Alan S. Hart Freeway / 123741375 / 1": 'Douglas Boulevard', # East # Roseville, Douglas Boulevard
        "Alan S. Hart Freeway / 119400793 / 1": 'Junction, Route 65', # West # "Jct. Rte. 65" 
        "Alan S. Hart Freeway / 119400788 / 2": 'Junction, Route 65', # East # "Jct. Rte. 65"
        "Alan S. Hart Freeway / 972564814 / 1": 'Sierra College Boulevard', # West # "Rocklin, Sierra College Boulevard"
        "Alan S. Hart Freeway / 992635393 / 1": 'Sierra College Boulevard', # East # Rocklin, Sierra College Boulevard" 
        "Alan S. Hart Freeway / 992632108 / 1": 'New Castle', # West # New Castle Road
        "Alan S. Hart Freeway / 160249245 / 1": 'New Castle', # East # New Castle Road
        "Alan S. Hart Freeway / 119524661 / 1": 'Hwy 49', # West # Auburn, Jct. Rte. 49
        "Alan S. Hart Freeway / 184557632 / 1": 'Hwy 49', # East # Auburn, Jct. Rte. 49
        'Alan S. Hart Freeway / 1020194794 / 1': 'Foresthill Road', # West # Auburn Ravine Road
        'Alan S. Hart Freeway / 972220492 / 2': 'Foresthill Road', # East # Auburn Ravine Road
        'Alan S. Hart Freeway / 119400812 / 7': 'Haines Road', # West # Haines Road
        'Alan S. Hart Freeway / 28869361 / 3': 'Haines Road', # East # Haines Road
        'Alan S. Hart Freeway / 119400812 / 3': 'Clipper Gap', # West # Clipper Gap
        'Alan S. Hart Freeway / 28869361 / 7': 'Clipper Gap', # East # Clipper Gap
        'Alan S. Hart Freeway / 32601609 / 3': 'Weimar Cross Road', # West # Weimar Crossroad
        'Alan S. Hart Freeway / 103496131 / 3': 'Weimar Cross Road', # East # Weimar Crossroad
        'Alan S. Hart Freeway / 4598068 / 3': 'Colfax', # West # Colfax, Jct. Rte. 174 North
        'Alan S. Hart Freeway / 32601614 / 5': 'Colfax', # East # Colfax, Jct. Rte. 174 North
        'Alan S. Hart Freeway / 30362645 / 4': 'Monte Vista', # West # Monte Vista
        'Alan S. Hart Freeway / 183374647 / 6': 'Monte Vista', # East # Monte Vista
        'Alan S. Hart Freeway / 30365950 / 2': 'Blue Canyon Road', # West # Blue Canyon Road
        'Alan S. Hart Freeway / 30365952 / 2': 'Blue Canyon Road', # East # Blue Canyon Road
        'Alan S. Hart Freeway / 30409920 / 1': 'Junction Route 20', # West # Jct. Rte. 20 West
        'Alan S. Hart Freeway / 30409918 / 3': 'Junction Route 20', # East # Jct. Rte. 20 West
        'Alan S. Hart Freeway / 30450625 / 2': 'Kingvale', # West # Kingvale
        'Alan S. Hart Freeway / 30450623 / 2': 'Kingvale', # East # Kingvale
        'Alan S. Hart Freeway / 30411372 / 4': 'Soda Springs', # West # Soda Spring OC
        'Alan S. Hart Freeway / 762030961 / 2': 'Soda Springs', # East # Soda Springs
        'Alan S. Hart Freeway / 30411369 / 1': 'Donner Lake', # West # Donner Lake
        'Alan S. Hart Freeway / 183374639 / 2': 'Donner Lake', # East # Donner Lake
        'Alan S. Hart Freeway / 29305166 / 2': 'Junction Highway 89', # West # Truckee, Jct. Rte. 89 South
        'Alan S. Hart Freeway / 175161349 / 1': 'Junction Highway 89', # East # Truckee, Jct. Rte. 89 South
        'Alan S. Hart Freeway / 175161348 / 1': 'Junction 267', # West # Jct. Rte. 89 North, Jct. Rte. 267 South
        'Alan S. Hart Freeway / 183374643 / 1': 'Junction 267', # East # Jct. Rte. 89 North, Jct. Rte. 267 South
        'Alan S. Hart Freeway / 30433029 / 2': 'Hirschdale Road', # West # Hirschdale Road Overhead (BOCA)
        'Alan S. Hart Freeway / 30433027 / 1': 'Hirschdale Road', # East # Hirschdale Road Overhead (BOCA)
        'I 80 / 30444756 / 1': 'Nevada State Line', # West # Nevada State Line
        'I 80 / 30444747 / 1': 'Nevada State Line' # East # Nevada State Line
    }
    
    # Apply the mapping to the 'networklinkid' column to create the 'ew_direction' column
    df['location'] = df[field].map(location_mapping)

        # Define a mapping for Network Link IDs to Locations
    direction_mapping = {
        "Alan S. Hart Freeway / 972215103 / 1": 'West', # Douglas Boulevard
        "Alan S. Hart Freeway / 123741375 / 1": 'East', # Douglas Boulevard
        "Alan S. Hart Freeway / 119400793 / 1": 'West', # West # Jct. Rte. 65
        "Alan S. Hart Freeway / 119400788 / 2": 'East', # East # Jct. Rte. 65
        "Alan S. Hart Freeway / 972564814 / 1": 'West', # Sierra College Boulevard
        "Alan S. Hart Freeway / 992635393 / 1": 'East', # Sierra College Boulevard
        "Alan S. Hart Freeway / 992632108 / 1": 'West', # New Castle Road
        "Alan S. Hart Freeway / 160249245 / 1": 'East', # New Castle Road
        "Alan S. Hart Freeway / 119524661 / 1": 'West', # Auburn, Jct. Rte. 49
        "Alan S. Hart Freeway / 184557632 / 1": 'East', # Auburn, Jct. Rte. 49
        'Alan S. Hart Freeway / 1020194794 / 1': 'West', # Auburn Ravine Road
        'Alan S. Hart Freeway / 972220492 / 2': 'East', # Auburn Ravine Road
        'Alan S. Hart Freeway / 119400812 / 7': 'West', # Haines Road
        'Alan S. Hart Freeway / 28869361 / 3': 'East', # Haines Road
        'Alan S. Hart Freeway / 119400812 / 3': 'West', # Clipper Gap
        'Alan S. Hart Freeway / 28869361 / 7': 'East', # Clipper Gap
        'Alan S. Hart Freeway / 32601609 / 3': 'West', # Weimar Crossroad
        'Alan S. Hart Freeway / 103496131 / 3': 'East', # Weimar Crossroad
        'Alan S. Hart Freeway / 4598068 / 3': 'West', # Colfax, Jct. Rte. 174 North
        'Alan S. Hart Freeway / 32601614 / 5': 'East', # Colfax, Jct. Rte. 174 North
        'Alan S. Hart Freeway / 30362645 / 4': 'West', # Monte Vista
        'Alan S. Hart Freeway / 183374647 / 6': 'East', # Monte Vista
        'Alan S. Hart Freeway / 30365950 / 2': 'West', # Blue Canyon Road
        'Alan S. Hart Freeway / 30365952 / 2': 'East', # Blue Canyon Road
        'Alan S. Hart Freeway / 30409920 / 1': 'West', # Jct. Rte. 20 West
        'Alan S. Hart Freeway / 30409918 / 3': 'East', # Jct. Rte. 20 West
        'Alan S. Hart Freeway / 30450625 / 2': 'West', # Kingvale
        'Alan S. Hart Freeway / 30450623 / 2': 'East', # Kingvale
        'Alan S. Hart Freeway / 30411372 / 4': 'West', # Soda Spring OC
        'Alan S. Hart Freeway / 762030961 / 2': 'East', # Soda Springs
        'Alan S. Hart Freeway / 30411369 / 1': 'West', # Donner Lake
        'Alan S. Hart Freeway / 183374639 / 2': 'East', # Donner Lake
        'Alan S. Hart Freeway / 29305166 / 2': 'West', # Truckee, Jct. Rte. 89 South
        'Alan S. Hart Freeway / 175161349 / 1': 'East', # Truckee, Jct. Rte. 89 South
        'Alan S. Hart Freeway / 175161348 / 1': 'West', # Jct. Rte. 89 North, Jct. Rte. 267 South
        'Alan S. Hart Freeway / 183374643 / 1': 'East', # Jct. Rte. 89 North, Jct. Rte. 267 South
        'Alan S. Hart Freeway / 30433029 / 2': 'West', # Hirschdale Road Overhead (BOCA)
        'Alan S. Hart Freeway / 30433027 / 1': 'East', # Hirschdale Road Overhead (BOCA)
        'I 80 / 30444756 / 1': 'West', # Nevada State Line
        'I 80 / 30444747 / 1': 'East' # Nevada State Line
    }
    
    # Apply the mapping to the 'networklinkid' column to create the 'ew_direction' column
    df['ew_direction'] = df[field].map(direction_mapping) 
    
    # Define a mapping for Network Link IDs to Locations
    location_mapping = {
        'Douglas Boulevard': 0,
        'Junction, Route 65': 1, 
        'Sierra College Boulevard': 2,
        'New Castle': 3,
        'Hwy 49': 4,
        'Foresthill Road': 5,
        'Haines Road': 6,
        'Clipper Gap': 7,
        'Weimar Cross Road': 8,
        'Colfax': 9,
        'Monte Vista': 10,
        'Blue Canyon': 11,
        'Junction Route 20': 12,
        'Kingvale': 13,
        'Soda Springs': 14,
        'Donner Lake': 15,
        'Junction Highway 89': 16,
        'Junction 267': 17,
        'Hirschdale Road': 18,
        'Nevada State Line': 19
    }
    
    # Apply the mapping to the 'networklinkid' column to create the 'ew_direction' column
    df['location_order'] = df['location'].map(location_mapping)
    
    return df

In [36]:
# Add the ['location'], ['ew_direction'], and ['location_order']
df01_stl = classify_location_stl(df01_stl, 'zonename')

In [37]:
# Add the Year, Route, and Data Source using the function 'add_year_route_source'
df01_stl = add_year_source_route(df01_stl, '2022', 'StreetLight', '80')

In [38]:
# rename the 'averagedailysegmenttraffic(stlvolume)' field's name to 'volume'
df01_stl = df01_stl.rename(columns={'averagedailysegmenttraffic(stlvolume)': 'volume'})

In [39]:
df01_stl_chart = bd_volume_bar_chart(df01_stl, "StreetLight I80 Volume 2022")
df01_stl_chart

In [40]:
#df01_stl.to_csv('streetlight_i80_volume_2022.csv', index=False)

In [41]:
# Create a subset of the data using the 'create_bd_subset' function
df01_stl = create_bd_subset(df01_stl)

## Cross-Platform Comparison

In [42]:
def combine_dataframes(df1, df2, df3):
    # Select the matching columns from each dataframe
    common_columns = list(set(df1.columns) & set(df2.columns) & set(df3.columns))
    
    # Concatenate the dataframes
    combined_df = pd.concat([df1[common_columns], df2[common_columns], df3[common_columns]], ignore_index=True)
    
    return combined_df

In [43]:
# Use the combine_dataframes function to combine the subset dataframes created for Caltrans, Replica, and StreetLight into one dataframe
df_combined = combine_dataframes(df01_ct, df01_rep, df01_stl)

In [44]:
# use the classify_location function to classify the locations
df_combined = location_order(df_combined)

In [45]:
df_combined.columns

Index(['year', 'route_number', 'volume', 'location', 'source',
       'location_order'],
      dtype='object')

In [46]:
df_combined

Unnamed: 0,year,route_number,volume,location,source,location_order
0,2022,80,194500.0,Douglas Boulevard,Caltrans Traffic Census,0
1,2022,80,137500.0,"Junction, Route 65",Caltrans Traffic Census,1
2,2022,80,85500.0,Sierra College Boulevard,Caltrans Traffic Census,2
3,2022,80,76000.0,New Castle,Caltrans Traffic Census,3
4,2022,80,69500.0,Hwy 49,Caltrans Traffic Census,4
...,...,...,...,...,...,...
74,2022,80,48346.0,Sierra College Boulevard,StreetLight,2
75,2022,80,38903.0,New Castle,StreetLight,3
76,2022,80,49714.0,Sierra College Boulevard,StreetLight,2
77,2022,80,17960.0,Nevada State Line,StreetLight,19


In [47]:
def add_total_volume_column(df):
    # Calculate the total volume for each location by source
    total_volume = df.groupby(['location', 'source'])['volume'].transform('sum')
    df['total_volume'] = total_volume
    return df

In [48]:
# A total column was created to total up the traffic volumes
# This was needed because StreetLight provided East and West values (Highway 50 runs east-west)
df_combined = add_total_volume_column(df_combined)

In [49]:
def drop_duplicates_for_specific_source(df, source_value, column_name):
    # Separate the dataframe into two parts: one with the specific source and one without
    df_source = df[df['source'] == source_value]
    df_other = df[df['source'] != source_value]
    
    # Drop duplicates in the specified column for the specific source
    df_source = df_source.drop_duplicates(subset=[column_name])
    
    # Concatenate the two dataframes back together
    df_combined = pd.concat([df_source, df_other], ignore_index=True)
    
    return df_combined

In [50]:
# use the drop_duplicates_for_specific_source function to remove duplicate locations for the StreetLight records
df_combined = drop_duplicates_for_specific_source(df_combined, 'StreetLight', 'location')

In [51]:
def create_grouped_bar_chart(df):
    # Sort the DataFrame by 'location_order'
    location_order = df[['location', 'location_order']].drop_duplicates().sort_values('location_order')['location'].tolist()
        
    # Create the bar chart using Altair
    chart = alt.Chart(df).mark_bar().encode(
        x=alt.X('source:N', title='Source'),
        y=alt.Y('total_volume:Q', title='Volume'),
        color='source:N',
        column=alt.Column('location:N', sort=location_order, title='Location'),
        tooltip=['location:N', 'source:N', 'total_volume:Q']
    ).properties(
        width=100,
        height=500
    ).resolve_scale(
        y='shared'
    )
    
    return chart

In [52]:
df_combined_chart = create_grouped_bar_chart(df_combined)
df_combined_chart

In [53]:
#df_combined.to_csv('combined_i80_volume.csv', index=False)

In [54]:
#save(df_combined_chart, 'chart.png', method='node')