In [25]:
import pandas as pd
import requests
import json

# Use Highways England Api to acquire Traffic Flow Data

In [31]:
# Use Highways England Api to get 
def get_all_sites():
    return requests.get('http://webtris.highwaysengland.co.uk/api/v1/sites')

In [33]:
sites = get_all_sites()

In [48]:
with open('C:\\Users\\U45720\\Documents\\GitLab\\human_factors_space_apps\\data\\raw\\sites.json','w') as f:
    json.dump(sites.json(),f)

In [50]:
sites_rows = sites.json()['sites']

In [88]:
def request_report(
    report_type :str,
    site_ids : list,
    start_date : str,
    end_date : str,
    page : int,
    page_size : int
):
    """
    Returns Report Data from a list of traffic sites in Json Format
    
    Args:
    
        report_type(str) : can choose between Daily, Monthly, Yearly
        
        site_ids(list) : list of site ids
        
        start_date(str) : datetime str with format %d%m%Y
        
        end_date(str): datetime str with format %d%m%Y
        
        page(int) : Select how many pages the json should return in
        
        page_size(int) : Select the page size
        
    Returns:
    
        Result: Json file
    """
    formatted_site_ids = '%C'.join(site_ids)
    url = f'http://webtris.highwaysengland.co.uk/api/v1/reports/{report_type}?sites={formatted_site_ids}&start_date={start_date}&end_date={end_date}&page={page}&page_size={page_size}'

    return requests.get(url)

In [284]:
#e.g.
report = request_report(report_type='Monthly',site_ids = ['2'],start_date = '01012020',end_date = '30052020')

In [202]:
def get_traffic_flow(report_data):
    # Parse each entry in the report data to get the Traffic Flow values for each day of the month
    flow_value_per_month = []
    for month_no in range(len(report_data)):
        for entry in report_data[month_no]['Days']:
            flow_value_per_month.append([entry['DayNumber'].rjust(2,'0')+' '+ report_data[month_no]['Month'], entry['FlowValue']])
    
    return flow_value_per_month

In [286]:
# Get the Data Format the Data will be in after being parsed
dates = [i[0] for i in get_traffic_flow(report.json()['MonthCollection'])]

In [266]:
# Dictionary to be converted to dataframe after being populated with ALL the Traffic Flow data for the Space Apps Challenge
_dict = {}
for i in dates:
    _dict[i] = {}

In [270]:
for idx,row in tqdm(enumerate(sites_rows)):
    # If the Traffic Site is active it is taken into consideration
    if row['Status'] == 'Active':
        report = request_report(report_type='Monthly',site_ids = [row['Id']],start_date = '01012020',end_date = '30052020')
        # If the HTTPS status code is succesful then append to dictionary
        if report.status_code == 200:
            report_flow_data = get_traffic_flow(report.json()['MonthCollection'])
            for entry in report_flow_data:
                _dict[entry[0]][row['Id']] = entry[1]



0it [00:00, ?it/s]

2it [00:00, 12.48it/s]

3it [00:00,  9.52it/s]

4it [00:00,  8.22it/s]

In [262]:
df = pd.DataFrame(_dict)

In [297]:
df = df.replace('', '0', regex=True)

In [301]:
df = df.replace(np.NaN, '0', regex=True)

In [302]:
for column in df.columns:
    df[column] = df[column].apply(lambda entry: int(entry))

In [303]:
df.head()

Unnamed: 0,01 January 2020,02 January 2020,03 January 2020,04 January 2020,05 January 2020,06 January 2020,07 January 2020,08 January 2020,09 January 2020,10 January 2020,...,21 April 2020,22 April 2020,23 April 2020,24 April 2020,25 April 2020,26 April 2020,27 April 2020,28 April 2020,29 April 2020,30 April 2020
2,10909,15816,14583,13937,15245,17793,15769,16341,16307,16601,...,6313,6406,6268,6215,2918,3087,7658,6962,6970,6699
3,31727,43372,43132,35515,35749,42985,43892,44861,46761,47852,...,16895,17103,17274,18041,8257,7053,17580,17845,18323,18548
5,28818,43035,47010,43095,40438,48950,48374,49585,50652,52321,...,24132,24829,24315,24418,13020,10501,24047,24307,25706,26312
7,876,1539,1685,1222,1030,1959,2020,2169,2135,2229,...,872,836,884,954,440,294,850,843,896,960
8,66115,86304,86379,82549,82084,91851,93752,96793,97001,99517,...,34267,35380,35215,35830,20016,16623,36078,35615,37067,37834


In [304]:
df.to_csv('traffic_flow_uk.csv')