# Road traffic data from Norway and Finland



In [5]:
#import os
#import geopandas as gpd
import pandas as pd
import requests
#import json
#import zipfile
import datetime
import time
from io import StringIO
import numpy as np
#import matplotlib.pyplot as plt
#from shapely.geometry import shape

In [6]:
url_traffic_nor = 'https://www.vegvesen.no/trafikkdata/api/'

In [4]:
grap_ql_body = """
{
  trafficRegistrationPoints {
    id
    name
    direction {
      from
      to
    }
    location {
      coordinates {
        latLon {
          lat
          lon
        }
      }
    }
  }
}
"""
payload = {
    'query': grap_ql_body
}
headers = {
    'Content-Type': 'application/json'
}
r = requests.post(url_traffic_nor, json=payload, headers=headers, allow_redirects=True)
all_nor_sensors = r.json()
all_nor_sensors = all_nor_sensors['data']['trafficRegistrationPoints']
print('Number of sensors in API: %s' % (len(all_nor_sensors), ))

Number of sensors in API: 6322


## 3. Get the sensor data from Vegvesenet's API (Norway)

The API only allows to get the data sensor by sensor. The following function is in charge of getting the data of a single sensor between the dates `from_day` and `to_day`. Something to note is that the API restricts the size its reponse, i.e. for a sufficiently large time period we would need to make several queries to get the complete data, the data is paged. This function also takes care of that by being recursive and taking the variable `cursor`. This variable is a pointer for the next chunk of data.

In [7]:
def get_traffic_NOR(sensor_id, from_day, to_day, cursor='', print_errors=True ):

    # The below function reads the data from the API
    def sensor_traffic_vegvesenetAPI(sensor_id, from_day, to_day, cursor, print_errors):
        
        url_traffic_nor = 'https://www.vegvesen.no/trafikkdata/api/'
        
        grap_ql_body = """
        {{
            trafficData(trafficRegistrationPointId: "{sensor_id}") {{
                volume {{
                    byHour(
                        from: "{from_day}T00:00:00+00:00"
                        to: "{to_day}T00:00:00+00:00"
                        {cursor}
                    ) {{
                        pageInfo {{
                            endCursor
                            hasNextPage
                        }}
                        edges {{
                            node {{
                                from
                                to
                                byDirection {{
                                    heading
                                    byLengthRange {{
                                    total {{
                                        volumeNumbers {{
                                            volume
                                        }}
                                        coverage {{
                                            percentage
                                        }}
                                    }}
                                    lengthRange {{
                                        representation
                                    }}
                                    
                                }}
                                }}
                            }}
                        }}
                    }}
                }}
            }}
        }}
        """.format(
            sensor_id=sensor_id,
            from_day=from_day,
            to_day=to_day,
            cursor=cursor
        )
        payload = {
            'query': grap_ql_body
        }
        headers = {
            'Content-Type': 'application/json'
        }

        
        
        # If we make a lot of requests the API might fail and timeout
        # Try to get the information 3 times before quiting
        it = 0
        r = None
        while it < 3:
            try:
                r = requests.post(url_traffic_nor, json=payload, headers=headers, allow_redirects=True, timeout=10)
            except Exception:
                if print_errors:
                    print("TimeoutError: {id}".format(id=sensor_id))

            if r is None:
                print("Waiting for 5 seconds")
                time.sleep(5)
                it += 1
            else:
                if r.status_code == 200:
                    break
                elif r.status_code == 404:
                    break
                else:
                    print(f"Waiting: {r.status_code}")
                    time.sleep(5)
                    it += 1
        
        if r.status_code != 200:
            raise ValueError('Incorrect response from API:\n%s' % (r.content, ))
        
        sensor_response = r.json()
        if sensor_response['data'] is None:
            raise ValueError('Incorrect response from API:\n%s' % (sensor_response, ))

        has_next_page = sensor_response['data']['trafficData']['volume']['byHour']['pageInfo']['hasNextPage']
        sensor_data = sensor_response['data']['trafficData']['volume']['byHour']['edges']

        lines = []
        for node_data in r.json()['data']['trafficData']['volume']['byHour']['edges']:
            from_date = datetime.datetime.strptime(node_data['node']['from'].split('T')[0],'%Y-%m-%d')
            to_date = datetime.datetime.strptime(node_data['node']['to'].split('T')[0],'%Y-%m-%d')
            #to_date = node_data['node']['to'].split('T')[0]

            from_hour = datetime.time(hour = int(node_data['node']['from'].split('T')[1].split(':')[0]))
            to_hour = datetime.time(hour = int(node_data['node']['to'].split('T')[1].split(':')[0]))
            
            #to_hour = node_data['node']['to'].split('T')[1]

            #hour = "%s-%s" % (from_hour.split(':')[0], to_hour.split(':')[0])

            for direction in  node_data['node']['byDirection']:
                dir_name = direction['heading']

                small = float('NaN')
                heavy = float('NaN')
                unknown_length = float('NaN')

                for length_range in direction['byLengthRange']: #[0:2]:
                    veh_type = length_range['lengthRange']['representation']
                    if length_range['total']['volumeNumbers'] is None:
                        total = 0
                        if print_errors:
                            print(
                                'Sensor error: %s.\nDate:%s. Hour: %s\nDirection: %s.' % (
                                    sensor_id,
                                    from_date,
                                    from_hour,
                                    direction
                                )
                            )
                    else:
                        total = length_range['total']['volumeNumbers']['volume']

                    if veh_type == '[..,5.6)':
                        small = total
                    elif veh_type == '[5.6,..)':
                        heavy = total
                    else:
                        unknown_length = np.nansum([total,unknown_length])
                        #print(unknown_length + ' : Does not recognice vehicle length value')
                        #raise ValueError('does not recognice length value') #### CONTINUE HERE!!!!

                line = [sensor_id, from_date, to_date, from_hour, to_hour, dir_name, small, heavy, unknown_length]
                lines.append(line)

        end_cursor = ''

        if has_next_page:
            end_cursor = sensor_response['data']['trafficData']['volume']['byHour']['pageInfo']['endCursor']
        
        return has_next_page,end_cursor,lines
    

    # Read data for a singel sensor
    next_page, end_curs, lines = sensor_traffic_vegvesenetAPI(sensor_id, from_day, to_day, cursor, print_errors)

    # If the long time period, use the end cursor from last call to read next page  
    while next_page: 
        next_page, end_curs, new_lines = sensor_traffic_vegvesenetAPI(sensor_id, from_day, to_day, cursor='after:"{end_cursor}"'.format(end_cursor=end_curs), print_errors=True)
        lines = lines + new_lines

    # Construct a pandas DF with the data
    sensor_traffic_df = pd.DataFrame.from_records(
                            lines,
                            columns=['sensor_id', 'from_date', 'to_date', 'from_hour','to_hour', 'sensor_dir', 'short_vehicles', 'long_vehicles', 'unknown_length']
                        )

    return(sensor_traffic_df)

### Download and save traffic for NOR-SWE sensors
The data are saved in separate files for each sensor location

In [8]:
# Takes a bit more than an hour to run this

sensor_list = ["01777V885181", # OK
"77275V885276", # OK
#"51812V1203972", # Missing: a bit 2018 and early 2019-late 2020. Wount capture pandemic change with this
"35829V885266", # Missing: 2017-2018
#"08581V885541", # Missing: 2017-mid 2019. Too little data for model fit, but include later? Not a lot of data, skip this one.
#"98823V578220", # Missing: 2017-mid 2019 and early 2020 and a bit 2022    Røyrvik, litt tvilsom
"99923V578123", # OK
#"93561V578187", # Missing: 2017-mid 2019 and a bit late 2019. Cannot compare before and after pandemic
"50089V578151", # Missing: 2017-late 2018 (+ start of pandemic?). ?????????
"84237V578097", # OK
#"11051V704737", # Missing: partly 2017 and 2021 - end. Drevsjø øst, litt tvilsom. Ikke post-pandemic data
"69140V704643", # OK
#"14158V705081", # Missing: 2017-2019 + sporadisk. Flermoberget, litt tvilsom ??????????
"00737V704646", # OK
"94864V704707", # Missing: sporadisk gjennom pandemien ????????
"94299V704696", # OK
"57929V705247", # Missing: A bit 2018/1019     Øyermoen, litt tvilsom
"76778V704564", # OK  Morokulien, litt tvilsom
"05732V971567", # Missing: 2017-mid 2017
"21405V2607269", # Missing: 2017-early 2019 NB!!!!!!!!
"09269V971425", # Missing: 2017-mid/late 2017
"52209V971422", # Missing 2017-late 2017    Prestbakke, litt tvilsom
"02535V971411", # Missing: 2017-late 2018
#"57474V971423", # Missing: 2017-late 2018 + sporadisk rundt aarsskiftene.   Berby, bittelitt tvilsom, men kanskje ikke
"04904V971774", # OK
"35229V971507"] # OK

from_day = '2017-01-01'
to_day = '2023-12-31'

path = '../Data/NorSwe/'

for s in sensor_list:
    d = get_traffic_NOR(s, from_day, to_day, cursor='', print_errors=True )
    d.to_csv(path+s+'_by_length_hour.csv', index = False)

TimeoutError: 01777V885181
Waiting for 5 seconds
TimeoutError: 57929V705247
Waiting for 5 seconds
TimeoutError: 35229V971507
Waiting for 5 seconds


## Get sensor traffic for Finland

In [3]:
def get_traffic_FIN(start_date, end_date,tmsid):
    
    #tmsid haparanda: 1431
    dates = pd.date_range(start_date,end_date)

    df = pd.DataFrame({'date':dates})
    #df['year'] = df['date'].apply(lambda x: x.year)

    df['days'] = df.apply(lambda x: (x.date-datetime.datetime(int(x.date.year),1,1)).days +1, axis=1)
    df['year'] = df.apply(lambda x: int(x.date.strftime(format = '%y')),axis = 1)

    col_names = ["TMS point id","year","ordinal date","hour","minute","second","1/100 second","length (m)","lane","direction","vehicle class","speed (km/h)","faulty (0=valid record, 1=faulty record)","total time (technical)","time interval (technical)","queue start (technical)"]

    data = []
    for d, y in zip(df.days,df.year):
        url = "https://tie.digitraffic.fi/api/tms/v1/history/raw/lamraw_"+ tmsid +"_"+str(y)+"_"+str(d)+".csv"
        try:
            data_new = pd.read_csv(url,sep = ";", header = None, names =  col_names).drop(columns=["second","1/100 second","vehicle class","speed (km/h)","faulty (0=valid record, 1=faulty record)","total time (technical)","time interval (technical)","queue start (technical)"])
        except:
            print('Error:' + url)
        else:
            data.append(data_new)
            #print(url)
        

    data_df = pd.concat(data)

    data_df["v_type"] = np.where(data_df['length (m)'] < 5.6, '<5.6m','>=5.6m')
    data_df['total_vehicles'] = 1
    data_df = data_df.groupby(['TMS point id','year','ordinal date','hour','minute','v_type','direction'])['total_vehicles'].count().reset_index()
    data_df = pd.merge(data_df.rename(columns = {'ordinal date': 'days'}),df,how = 'left',left_on = ['days','year'], right_on=['days','year'])

    return(data_df)


In [None]:
fin_sensor_list = ["1433","1432","1435","1436","1431"]
path = '../Data/FinSwe/'
from_day = datetime.datetime(2017,1,1)
to_day = datetime.datetime(2023,12,31)
for tms in fin_sensor_list:
    d = get_traffic_FIN(from_day, to_day,tmsid=tms)
    d.to_csv(path+tms+'_by_length_minute.csv', index = False)
