In [68]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import imageio
import os

In [69]:
url = "https://trafikkdata-api.atlas.vegvesen.no"

In [70]:
"""
STEP 1: Get traffic point data from Vestlandet county
"""

'\nSTEP 1: Get traffic point data from Vestlandet county\n'

In [71]:
def get_traffic_point(countyNumber: int, roadCategory: str):
    query = """
    query trafficRegistrationPoints($countyNumber: PositiveInt!, $roadCategoryIds: [RoadCategoryId!]!) {
      trafficRegistrationPoints(searchQuery: { countyNumbers: [$countyNumber], roadCategoryIds: $roadCategoryIds, isOperational: true, registrationFrequency: CONTINOUS, trafficType: VEHICLE }) {
        id
        name
        location {
          coordinates {
            latLon {
              lat
              lon
            }
          }
        }
      }
    }
    """

    variables = {
        "countyNumber": countyNumber,
        "roadCategoryIds": [roadCategory]
    }

    response = requests.post(url, json={'query': query, 'variables': variables})
    return response.json()


In [72]:
"""
Collect all traffic points of the three major road types for Vestlandet
"""
list_of_road_types = ["R", "E", "F"]
prepared_data = []

for rt in list_of_road_types:
    traffic_points = get_traffic_point(46, rt)['data']['trafficRegistrationPoints']
    for point in traffic_points:
        prepared_data.append({
            'id': point['id'],
            'name': point['name'],
            'latitude': point['location']['coordinates']['latLon']['lat'],
            'longitude': point['location']['coordinates']['latLon']['lon'],
            'roadtype' : rt
        })


df = pd.DataFrame(prepared_data)

In [73]:
"""
TODO: remove this part, this is just to prune the data to make it faster

It removes 85% of the data. I do it at this step because I know that it's formatted properly
"""
df = df.sample(frac=0.15, random_state=42)

In [74]:
"""
Step 1 complete: Create a table of this form:
             id                               name   latitude  longitude  \
0  97639V384489                         Stigedalen  61.940462   6.137562   
1  22326V805039  STORAVATN, RAMPE FRA GODVIK/ASKØY  60.371598   5.214414   
2  90500V805707                  Harafjelltunnelen  60.371811   5.210907   
3  49965V384062                  Storehaugtunnelen  60.998265   6.543166  
"""

print(df.head())

df.to_csv('traffic_data_step1.csv', index=False)

               id                          name   latitude  longitude roadtype
157  37235B802722     Kristianborgvannet sykkel  60.352833   5.338993        E
341  83347V805435                    Indre Arna  60.421439   5.461440        F
315  86173V805748                        Torget  60.395323   5.325935        F
234  52794V805054                         Bønes  60.328319   5.311050        F
155  82885V805617  Fløyfjellstunnelen sørgående  60.389045   5.339588        E


In [75]:
"""
STEP 2: Get timed data
"""

'\nSTEP 2: Get timed data\n'

In [76]:
def get_hourly_volume_for_traffic_point(traffic_point_id : str, from_dt : str, to_dt : str):
    query = """
        query trafficData($traffic_point_id: String!, $from_dt: ZonedDateTime!, $to_dt: ZonedDateTime!) {
            trafficData(trafficRegistrationPointId: $traffic_point_id) {
                volume {
                    byHour(from: $from_dt, to: $to_dt) {
                       edges {
                        node {
                            total {
                            volumeNumbers {
                                volume
                            }
                            }
                        }
                        }
                    }
                    }
                }
                }
    """

    variables = {
        "from_dt": from_dt,
        "to_dt" : to_dt,
        "traffic_point_id" : traffic_point_id
    }

    response = requests.post(url, json={'query': query, 'variables': variables})
    return response.json()

In [77]:
def get_volumes_per_hour(traffic_id : str, from_datetime : str, to_datetime : str, hour_span : int):
    volumes = []
    data = get_hourly_volume_for_traffic_point(traffic_id, from_datetime, to_datetime)['data']['trafficData']

    for hour in range(hour_span):
        try:
            volume_at_hour = data['volume']['byHour']['edges'][hour]['node']['total']['volumeNumbers']['volume']
            volumes.append(volume_at_hour)
        except TypeError:
            return volumes
    return volumes

In [78]:
"""
Add the traffic volume for each hour of the day at each traffic stop.

This step is very resource-intensive!
"""
list_of_all_ids = df['id'].tolist()

#Selected range, remains constant for now. It is a 24 hour period from a year ago.
from_dt= "2023-03-19T00:00:00+01:00"
to_dt = "2023-03-20T00:00:00+01:00"
hour_span = 24 #manual input for now, but COULD be inferred


#Initialize columns for hourly data in the DataFrame with default
for hour in range(hour_span):
    df[f'hour_{hour}'] = 0


#Collect volume for each hour of the day
for traffic_id in list_of_all_ids:
    traffic_volumes_at_point = get_volumes_per_hour(traffic_id, from_dt, to_dt, hour_span)
    
    if len(traffic_volumes_at_point) == 0:
        df = df.drop(df[df['id'] == traffic_id].index)
        continue
    
    for hour in range(hour_span):
        df.loc[df['id'] == traffic_id, f'hour_{hour}'] = traffic_volumes_at_point[hour]


In [79]:
"""
STEP 2 complete!

Table is now in the form of:
id,name,latitude,longitude,roadtype,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23
37235B802722,Kristianborgvannet sykkel,60.352833,5.338993,E,1,0,0,1,0,0,2,2,4,2,2,7,6,7,12,6,4,3,2,3,1,0,1,1
83347V805435,Indre Arna,60.421439,5.46144,F,114,97,63,44,36,18,48,83,123,123,178,267,392,452,530,571,524,491,476,362,301,237,153,81
86173V805748,Torget,60.395323,5.325935,F,435,440,432,316,194,86,63,91,100,143,222,311,447,500,532,556,540,473,468,403,389,252,204,113
52794V805054,Bønes,60.328319,5.31105,F,147,119,79,49,29,26,52,101,148,238,443,557,715,842,875,898,922,845,744,671,461,285,218,96
"""


# Display the first few rows of the DataFrame to verify format
print(df.head())

df.to_csv('traffic_data_step2.csv', index=False)

               id                          name   latitude  longitude  \
157  37235B802722     Kristianborgvannet sykkel  60.352833   5.338993   
341  83347V805435                    Indre Arna  60.421439   5.461440   
315  86173V805748                        Torget  60.395323   5.325935   
234  52794V805054                         Bønes  60.328319   5.311050   
155  82885V805617  Fløyfjellstunnelen sørgående  60.389045   5.339588   

    roadtype  hour_0  hour_1  hour_2  hour_3  hour_4  ...  hour_14  hour_15  \
157        E       1       0       0       1       0  ...       12        6   
341        F     114      97      63      44      36  ...      530      571   
315        F     435     440     432     316     194  ...      532      556   
234        F     147     119      79      49      29  ...      875      898   
155        E     271     172     125      86      69  ...     2005     1833   

     hour_16  hour_17  hour_18  hour_19  hour_20  hour_21  hour_22  hour_23  
157     

In [80]:
"""
STEP 3: Visualize the data
"""

'\nSTEP 3: Visualize the data\n'