In [214]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import imageio
import os

In [215]:
url = "https://trafikkdata-api.atlas.vegvesen.no"

In [216]:
"""
STEP 1: Get traffic point data from Vestlandet county
"""

'\nSTEP 1: Get traffic point data from Vestlandet county\n'

In [217]:
def get_traffic_point(countyNumber: int, roadCategory: str):
    query = """
    query trafficRegistrationPoints($countyNumber: PositiveInt!, $roadCategoryIds: [RoadCategoryId!]!) {
      trafficRegistrationPoints(searchQuery: { countyNumbers: [$countyNumber], roadCategoryIds: $roadCategoryIds, isOperational: true, registrationFrequency: CONTINUOUS, trafficType: VEHICLE }) {
        id
        name
        location {
          coordinates {
            latLon {
              lat
              lon
            }
          }
        }
      }
    }
    """
    
    variables = {
        "countyNumber": countyNumber,
        "roadCategoryIds": [roadCategory]
    }
    
    return requests.post(url, json={'query': query, 'variables': variables}).json()

In [218]:
"""
Collect all traffic points of the three major road types for Vestlandet
"""
list_of_road_types = ["R", "E", "F"]
prepared_data = []

for rt in list_of_road_types:
    traffic_points = get_traffic_point(46, rt)['data']['trafficRegistrationPoints']
    for point in traffic_points:
        prepared_data.append({
            'id': point['id'],
            'name': point['name'],
            'latitude': point['location']['coordinates']['latLon']['lat'],
            'longitude': point['location']['coordinates']['latLon']['lon'],
            'roadtype' : rt
        })


df = pd.DataFrame(prepared_data)

In [219]:
"""
TODO: remove this part, this is just to prune the data to make it faster

It removes 85% of the data. I do it at this step because I know that it's formatted properly

TODO: produces an error if you increase the fraction????? 
"""
df = df.sample(frac=0.15, random_state=42)

In [220]:
"""
Step 1 complete: Create a table of this form:
             id                               name   latitude  longitude  \
0  97639V384489                         Stigedalen  61.940462   6.137562   
1  22326V805039  STORAVATN, RAMPE FRA GODVIK/ASKØY  60.371598   5.214414   
2  90500V805707                  Harafjelltunnelen  60.371811   5.210907   
3  49965V384062                  Storehaugtunnelen  60.998265   6.543166  
"""

print(df.head())

df.to_csv('traffic_data_step1.csv', index=False)

                id                 name   latitude  longitude roadtype
231  44953V1885943  Bjørgeveien Straume  60.325105   5.273449        F
110   25132V805616  Danmarksplass-Åsane  60.383487   5.335978        E
250   65743V805722           Juvik nord  60.426053   5.195758        F
9     29614V805708       Sotrabrua vest  60.372083   5.155691        R
93    87699V804741            Blindheim  60.469487   5.370048        E


In [221]:
"""
STEP 2: Get timed data
"""

'\nSTEP 2: Get timed data\n'

In [222]:
def get_yearly_avarage_for_point(traffic_point_id):
    query = """
    query trafficData($traffic_point_id : String!) {
       trafficData(trafficRegistrationPointId: $traffic_point_id) {
        volume {
          average {
            daily {
              byYear {
                year
                total {
                  volume {
                    average
                    confidenceInterval {
                      lowerBound
                      upperBound
                    }
                  }
                  coverage {
                    percentage
                  }
                }
              }
            }
          }
        }
      }
  }
    """

    variables = {
        "traffic_point_id" : traffic_point_id
    }

    response = requests.post(url, json={'query': query, 'variables': variables})
    return response.json()

In [223]:
"""
Add the traffic volume for each hour of the day at each traffic stop.

This step is very resource-intensive!
"""
list_of_all_ids = df['id'].tolist()


CUT_OFF_YEAR = 2015

d = {tid : {} for tid in list_of_all_ids }
for traffic_id in list_of_all_ids:
    volumes_as_list = get_yearly_avarage_for_point(traffic_id)['data']['trafficData']['volume']['average']['daily']['byYear']
    dict_of_volumes_for_traffic_id = {}
    for l in volumes_as_list:
        if l['year'] >= CUT_OFF_YEAR:
            try:
                dict_of_volumes_for_traffic_id[l['year']] = l['total']['volume']['average']
            except TypeError:
                continue
    d[traffic_id] = dict_of_volumes_for_traffic_id


new_d = {}
for k, v in d.items():
    first_year = min(v.keys())
    if not first_year > CUT_OFF_YEAR:
        new_d[k] = d[k]

for year in range(CUT_OFF_YEAR, 2023):
    df[str(year)] = 0

for index, row in df.iterrows():
    traffic_id = row['id']
    if traffic_id in new_d:
        for year in range(CUT_OFF_YEAR, 2023):
            if year in new_d[traffic_id]:
                df.at[index, str(year)] = new_d[traffic_id][year]
                


In [224]:
"""
STEP 2 complete!

Table is now in the form of:
id,name,latitude,longitude,roadtype,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23
37235B802722,Kristianborgvannet sykkel,60.352833,5.338993,E,1,0,0,1,0,0,2,2,4,2,2,7,6,7,12,6,4,3,2,3,1,0,1,1
83347V805435,Indre Arna,60.421439,5.46144,F,114,97,63,44,36,18,48,83,123,123,178,267,392,452,530,571,524,491,476,362,301,237,153,81
86173V805748,Torget,60.395323,5.325935,F,435,440,432,316,194,86,63,91,100,143,222,311,447,500,532,556,540,473,468,403,389,252,204,113
52794V805054,Bønes,60.328319,5.31105,F,147,119,79,49,29,26,52,101,148,238,443,557,715,842,875,898,922,845,744,671,461,285,218,96
"""


# Display the first few rows of the DataFrame to verify format
print(df.head())

df.to_csv('traffic_data_step2.csv', index=False)

                id                 name   latitude  longitude roadtype   2015  \
231  44953V1885943  Bjørgeveien Straume  60.325105   5.273449        F  11542   
110   25132V805616  Danmarksplass-Åsane  60.383487   5.335978        E      0   
250   65743V805722           Juvik nord  60.426053   5.195758        F      0   
9     29614V805708       Sotrabrua vest  60.372083   5.155691        R  26899   
93    87699V804741            Blindheim  60.469487   5.370048        E  16809   

      2016   2017   2018   2019   2020   2021   2022  
231  11369  11853  14603  11458  11046  11365  10964  
110      0      0      0      0      0      0      0  
250      0      0      0      0      0      0      0  
9    27151  27663  27796  27941  26198  26922  27457  
93   17460  17796  17917  17222  16378  17058  17178  


In [225]:
"""
STEP 3: Visualize the data
"""

'\nSTEP 3: Visualize the data\n'