In [16]:
import pandas as pd
import requests
import xml.etree.ElementTree as ET
from xml import etree

## .gov Data

In [17]:
URL = "https://s3.eu-west-2.amazonaws.com/data.nationalhighways.co.uk/ha-roadworks/nh_roadworks_2022_8_22.xml"

response = requests.get(URL)
with open('2022_8_22.xml', 'wb') as file:
    file.write(response.content)

In [18]:
tree = ET.parse('2022_8_22.xml')
root = tree.getroot()

In [19]:
root = root[0][0]

In [20]:
children = []
for child in root:
    children.append(child.attrib)

In [21]:
df = pd.DataFrame.from_records(children)

In [22]:
df.columns

Index(['NEW_EVENT_NUMBER', 'SDATE', 'EDATE', 'EXPDEL', 'DESCRIPTION',
       'CLOSURE_TYPE', 'STATUS', 'PUBLISHED_DATE', 'OLD_REFERENCE_NUMBER'],
      dtype='object')

In [23]:
df['SDATE'] = pd.to_datetime(df['SDATE'])
df['EDATE'] = pd.to_datetime(df['EDATE'])
df['PUBLISHED_DATE'] = pd.to_datetime(df['PUBLISHED_DATE'])

In [24]:
df.sort_values(by='PUBLISHED_DATE', ascending=False)

Unnamed: 0,NEW_EVENT_NUMBER,SDATE,EDATE,EXPDEL,DESCRIPTION,CLOSURE_TYPE,STATUS,PUBLISHED_DATE,OLD_REFERENCE_NUMBER
913,00267992-003,2022-09-01 20:00:00,2022-09-23 05:30:00,Moderate (10 - 30 mins),M6 southbound jct 15 entry slip \r\nclosure of...,Ad-hoc Street/Road Works,Published,2022-08-21 15:04:55,
463,00268003-002,2022-08-30 20:00:00,2022-09-21 05:30:00,Moderate (10 - 30 mins),M6 northbound jct 15 \r\nclosure of jct 15 ent...,Developer Works,Published,2022-08-21 15:02:07,
138,00267909-003,2022-08-30 20:00:00,2022-09-21 05:30:00,Moderate (10 - 30 mins),M6 northbound jct 15 \r\ncarriageway closure f...,Developer Works,Published,2022-08-21 14:57:21,
292,00274427-002,2022-08-31 21:00:00,2022-09-01 06:00:00,Moderate (10 - 30 mins),M6 northbound jct 4 to jct 6 including M42 lin...,National Technology Works,Published,2022-08-21 14:52:44,
768,00270923-003,2022-08-22 20:00:00,2022-08-24 06:00:00,Slight (less than 10 mins),M42 southbound link road jct 3A to M42 southbo...,Programmed Routine Works,Published,2022-08-21 13:54:12,
...,...,...,...,...,...,...,...,...,...
1127,00226065-001,2022-08-29 22:00:00,2022-09-03 05:00:00,Moderate (10 - 30 mins),M23 southbound Jct 8 to M25 clockwise and anti...,Programmed Routine Works,Published,2021-07-28 09:58:24,
501,00224633-001,2022-08-22 21:00:00,2022-08-23 05:00:00,Slight (less than 10 mins),M67 Westbound Junction 3 - 2 Lanes 1 & 2 Westb...,Programmed Routine Works,Published,2021-07-13 15:33:06,
1126,00146456-002,2020-01-13 16:36:00,2025-01-01 06:00:00,Moderate (10 - 30 mins),No Closures allowed unless 5k away between Jun...,Embargo,Published,2020-12-22 12:35:25,
1125,00152862-001,2022-09-05 09:00:00,2022-09-09 15:30:00,Slight (less than 10 mins),A259 eastbound and westbound Pevensey roundabo...,Licensee Works,Published,2020-02-19 14:19:52,


In [25]:
for desc in df[df['CLOSURE_TYPE'] == 'Traffic Incidents'].DESCRIPTION:
    print(desc)

M6 Southbound Jct32 to 31a lane closures for recovery of HGV and installation of Varioguard
M18 northbound and southbound Jct 2.
Carriageway and lane closures due to incident.
Diversion route in place via National Highways network.
M180 westbound Jct 3 to Jct 2.
Carriageway and lane closures due to bridge strike.
Diversion route in place via Local Highway Authority network.


In [None]:
df.to_excel('2022_8_22.xlsx', index=False)

## TFL Data

In [11]:
import requests, json

In [12]:
def get_json_data(url) -> list:
    try:
        hdr ={
        # Request headers
        'Cache-Control': 'no-cache',
        }

        response = requests.get(url, headers=hdr)
    except Exception as e:
        print(e)
        return False
    return response.json()

def convert_json_to_df(json: list) -> pd.DataFrame:
    return pd.DataFrame.from_records(json)

def get_df_from_endpoint(url: str) -> pd.DataFrame | bool:
    json = get_json_data(url)
    if json:
        return convert_json_to_df(json)
    return False

In [13]:
# All roads managed by tfl.

roads_url = "https://api.tfl.gov.uk/Road/"
all_roads_df = get_df_from_endpoint(roads_url)

In [21]:
# Disruptions by Date
disruptions_url = "https://api.tfl.gov.uk/Road/all/Street/Disruption?startDate=01-01-2022&endDate=01-01-2023"
disruptions_df = get_df_from_endpoint(disruptions_url)

In [22]:
disruptions_df.to_excel('Exploring disruptions.xlsx')

# Webtris Highways England API

In [8]:
import requests, json
import pandas as pd

In [2]:
sites_url = "http://webtris.highwaysengland.co.uk/api/v1.0/sites"

In [3]:
response = requests.get(sites_url)

In [9]:
sites_json = response.json()

In [14]:
df = pd.DataFrame.from_records(sites_json['sites'])

In [15]:
df

Unnamed: 0,Id,Name,Description,Longitude,Latitude,Status
0,1,MIDAS site at M4/2295A2 priority 1 on link 105...,M4/2295A2,-0.520380,51.493012,Inactive
1,2,MIDAS site at A1M/2259B priority 1 on link 126...,A1M/2259B,-0.320275,52.535158,Active
2,3,MIDAS site at M5/7482B priority 1 on link 1090...,M5/7482B,-2.175138,52.175652,Active
3,4,MIDAS site at M3/2173A priority 1 on link 1030...,M3/2173A,-1.392374,50.960359,Inactive
4,5,MIDAS site at M25/5764B priority 1 on link 199...,M25/5764B,0.283162,51.575617,Active
...,...,...,...,...,...,...
19350,19779,TMU Site 9984/1 on link A5 northbound between ...,9984/1,-0.707930,51.998730,Active
19351,19780,TMU Site 9984/2 on link A5 southbound between ...,9984/2,-0.707696,51.998782,Active
19352,19781,TMU Site 9985/1 on A46 southbound between A563...,9985/1,-1.195749,52.663568,Active
19353,19782,TMU Site 9986/1 on link A46 northbound between...,9986/1,-1.116007,52.694358,Active
