This report provides issue/data quality information on the most recent endpoints for a hardcoded list of prioritised list of LPAs, or organisations from an input.

The input should be called 'organisation_input.csv' and contain one column, 'organisation' that has the organisation codes for the LPAs to be included in the report.

In [1]:
import numpy as np
import pandas as pd
import os
%pip install wget
import wget

Note: you may need to restart the kernel to use updated packages.


Download helper utility files from GitHub:

In [2]:
util_file = "master_report_endpoint_utils.py"
if os.path.isfile(util_file):
    from master_report_endpoint_utils import *
else:
    url = "https://raw.githubusercontent.com/digital-land/jupyter-analysis/main/service_report/master_report/master_report_endpoint_utils.py"
    wget.download(url)
    from master_report_endpoint_utils import *


In [3]:
# Get input from .csv or use default prioritised LPAs
input_path = './organisation_input.csv'
if os.path.isfile(input_path):
    input_df = pd.read_csv(input_path)
    organisation_list = input_df['organisation'].tolist()
    print('Input file found. Using', len(organisation_list), 'organisations from input file.')
else:
    organisation_list = ['local-authority-eng:BUC', 'local-authority-eng:DAC', 'local-authority-eng:DNC',
    'local-authority-eng:GLO', 'local-authority-eng:CMD', 'local-authority-eng:LBH', 'local-authority-eng:SWK',
    'local-authority-eng:MDW', 'local-authority-eng:NET', 'local-authority-eng:BIR', 'local-authority-eng:CAT',
    'local-authority-eng:EPS', 'local-authority-eng:BNE', 'local-authority-eng:GAT', 'local-authority-eng:GRY',
    'local-authority-eng:KTT', 'local-authority-eng:SAL', 'local-authority-eng:TEW', 'local-authority-eng:WBK',
    'local-authority-eng:DST', 'local-authority-eng:DOV', 'local-authority-eng:LIV', 'local-authority-eng:RDB',
    'local-authority-eng:WFT', 'local-authority-eng:NLN', 'local-authority-eng:NSM', 'local-authority-eng:SLF',
    'local-authority-eng:WRL' ]
    print('Input file not found. Using default list of organisations.')

Input file not found. Using default list of organisations.


In [4]:
# Get organisation names
organisation_info_df = pd.read_csv('https://raw.githubusercontent.com/digital-land/organisation-collection/main/data/local-authority.csv')
organisation_info_df.head()
organisation_name_dict = {}
for organisation in organisation_list:
    organisation_code = organisation.split(':')[1]
    organisation_name = organisation_info_df.loc[organisation_info_df['reference'] == organisation_code].iloc[0]['name']
    organisation_name_dict[organisation] = organisation_name

In [5]:
datasette_url = "https://datasette.planning.data.gov.uk/"


collection_list = ['article-4-direction', 'article-4-direction-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree']
pipelines_list = ['article-4-direction', 'article-4-direction-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree', 'tree,tree-preservation-order', 'tree-preservation-order,tree-preservation-zone']
all_orgs_recent_endpoints={}
for organisation in organisation_list:
    recent_endpoints_df = get_latest_endpoints(organisation)
    recent_endpoints_df = recent_endpoints_df[recent_endpoints_df['pipelines'].isin(pipelines_list)]
    all_orgs_recent_endpoints[organisation] = recent_endpoints_df


In [6]:
all_orgs_recent_endpoints["local-authority-eng:SWK"].head(50)

Unnamed: 0,endpoint_url,status,collection,pipelines,organisation,name,resource,maxentrydate,entrydate,end_date,last_status,last_updated_date,date_last_status_200
0,https://raw.githubusercontent.com/digital-land...,200.0,article-4-direction,article-4-direction-area,local-authority-eng:SWK,London Borough of Southwark,5158d13bfc6f0723b1fb07c975701a906e83a1ead4aee5...,2023-11-17 00:15:42+00:00,2022-04-06T16:59:34Z,,,,
1,https://raw.githubusercontent.com/digital-land...,200.0,article-4-direction,article-4-direction-area,local-authority-eng:SWK,London Borough of Southwark,e6d6d0a60345c50b0f40292b9ed6fcbd567fc36816fc3a...,2023-11-17 00:15:42+00:00,2022-04-06T16:57:35Z,,,,
2,https://raw.githubusercontent.com/digital-land...,200.0,listed-building,listed-building-outline,local-authority-eng:SWK,London Borough of Southwark,efbdafb929921097a6e002188e281047bb4d512d40a8f8...,2023-11-17 00:16:29+00:00,2021-12-07T11:11:51Z,,,,
3,https://raw.githubusercontent.com/digital-land...,200.0,article-4-direction,article-4-direction-area,local-authority-eng:SWK,London Borough of Southwark,e8791be20829a2aa6cca0ba62250b6b43eadaec78ac772...,2023-11-17 00:15:42+00:00,2022-04-06T16:55:55Z,,,,
4,https://raw.githubusercontent.com/digital-land...,200.0,conservation-area,conservation-area,local-authority-eng:SWK,London Borough of Southwark,98fe2cb81f50aa437d0679821a11d8e0e2a365105c75b7...,2022-04-12 00:05:11+00:00,2021-11-30T19:19:02Z,2022-04-12,,,
5,https://www.southwark.gov.uk/assets/attach/194...,200.0,listed-building,listed-building-outline,local-authority-eng:SWK,London Borough of Southwark,b626b9dce4b9ac203c31ef250c02520d3f0b1e94de1be3...,2023-11-17 00:16:29+00:00,2023-08-15T14:14:04Z,,,,
6,https://data.london.gov.uk/download/conservati...,200.0,conservation-area,conservation-area,local-authority-eng:SWK,London Borough of Southwark,f617d13ef7ff061424ec50ed641f05779797ff03beb2cd...,2023-11-17 00:04:11+00:00,2020-09-04T15:11:53Z,,,,
7,https://raw.githubusercontent.com/digital-land...,200.0,article-4-direction,article-4-direction-area,local-authority-eng:SWK,London Borough of Southwark,9510b41e61d9af455792dc1b4eb80d7600671804b9e878...,2023-11-17 00:15:42+00:00,2022-04-06T16:58:07Z,,,,
9,https://raw.githubusercontent.com/digital-land...,200.0,article-4-direction,article-4-direction,local-authority-eng:SWK,London Borough of Southwark,68d835acf03eef420f167d46ee969de29a4f5e6e436849...,2023-11-17 00:15:42+00:00,2022-04-25T19:19:59Z,,,,
10,https://raw.githubusercontent.com/digital-land...,200.0,listed-building,listed-building-outline,local-authority-eng:SWK,London Borough of Southwark,e3a22838b44a3c1e594eee33788728a6366caef066690e...,2023-11-17 00:16:29+00:00,2021-12-07T14:14:18Z,,,,


In [13]:
organisation_dataset_issues_dict = {}
info_issue_types = get_issues_with_severity_info()
for organisation in organisation_list:
    recent_endpoints_df = all_orgs_recent_endpoints[organisation]
    dataset_issues_dict = {}
    for index, row in recent_endpoints_df.iterrows():
        resource = row['resource']
        if ',' in row['pipelines']:
            datasets = row['pipelines'].split(',')
        else:
            datasets = [row['pipelines']]
        for dataset in datasets:
          print('Getting issues for', organisation, ' ', dataset, '...')
          issues_df = get_issues_for_resource(resource, dataset)
          issues_df.drop_duplicates(subset='issue_type', keep='first', inplace=True)
          issues = issues_df['issue_type'].values.tolist()
          
          if organisation_dataset_issues_dict.get(organisation, None) and (organisation_dataset_issues_dict.get(organisation, None)).get(dataset, None):
            existing_issues = organisation_dataset_issues_dict[organisation][dataset]
            issues_to_add = []
            for issue in issues:
              # Remove existing issues and severity=info issues from list 
              if issue not in (existing_issues or info_issue_types):
                issues_to_add.append(issue)
            dataset_issues_dict[dataset] = existing_issues.append(issues_to_add)
          else:
            # Remove info issues from list
            for issue in info_issue_types:
               if issue in issues:
                issues.remove(issue)
            dataset_issues_dict[dataset] = issues
        organisation_dataset_issues_dict[organisation] = dataset_issues_dict
for organisation, dataset_issues in organisation_dataset_issues_dict.items():
  for dataset, issues in dataset_issues.items():
    if issues is None or issues == []:
       organisation_dataset_issues_dict[organisation][dataset] = 'No issues'
    


Getting issues for local-authority-eng:BUC   listed-building-outline ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   tree-preservation-zone ...
Getting issues for local-authority-eng:BUC   tree-preservation-zone ...
Getting issues for local-authority-eng:BUC   tree ...
Getting issues for local-authority-eng:BUC   conservation-area ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ...
Getting issues for local-authority-eng:BUC   tree-preservation-zone ...
Getting issues for local-authority-eng:BUC   article-4-direction-area ..

In [14]:
pd.set_option('display.max_colwidth', None)

def compute_cell_colour(value):
    if value == "No issues":
        return 'background-color: green'
    elif value == "No endpoint":
        return 'background-color: orange'
    else:
        return 'background-color: red'

rows_list = []
for organisation in organisation_list:
    df = all_orgs_recent_endpoints[organisation]
    df = df[pd.isna(df['end_date'])]
    try:
        name = organisation_name_dict[organisation]
    except:
        name = organisation
    issues = {}
        
    new_row = {'organisation': name}
    if organisation_dataset_issues_dict.get(organisation, None) is not None:
        for k, v in organisation_dataset_issues_dict[organisation].items():
            if v != 'No issues':
                new_row[k] = ', '.join(v)
                organisation_dataset_issues_dict[organisation][k] = ', '.join(v)
            else:
                new_row[k] = v
    rows_list.append(new_row)

output_df = pd.DataFrame(rows_list, columns=['organisation', *collection_list])
# output_df.to_csv('endpoint_status_master_report.csv', index=False)
output_df = output_df.replace(np.nan, "No endpoint")
output_df = output_df.style.applymap(compute_cell_colour, subset=collection_list)
output_df

Unnamed: 0,organisation,article-4-direction,article-4-direction-area,conservation-area,conservation-area-document,listed-building-outline,tree-preservation-order,tree-preservation-zone,tree
0,Buckinghamshire Council,No endpoint,No issues,invalid geometry,No endpoint,"unknown entity, invalid geometry",No endpoint,No issues,unknown entity
1,Dacorum Borough Council,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint
2,Doncaster Metropolitan Borough Council,No endpoint,No issues,No issues,No endpoint,No issues,No endpoint,No issues,unknown entity
3,Gloucester City Council,No endpoint,unknown entity,No issues,No endpoint,No issues,No endpoint,No endpoint,No endpoint
4,London Borough of Camden,No issues,invalid geometry,No issues,No endpoint,invalid geometry,No endpoint,No endpoint,No endpoint
5,London Borough of Lambeth,No endpoint,No issues,No issues,No endpoint,"unknown entity, invalid geometry",invalid date,invalid geometry,No issues
6,London Borough of Southwark,No issues,"invalid geometry, unknown entity",invalid geometry,No endpoint,No issues,No issues,No issues,No endpoint
7,Medway Council,No endpoint,No issues,invalid geometry,No endpoint,"unknown entity, invalid geometry",unknown entity,"unknown entity, invalid geometry",No endpoint
8,Newcastle City Council,No issues,invalid geometry,No issues,No endpoint,"patch, invalid geometry",No issues,No issues,No issues
9,Birmingham City Council,No endpoint,No issues,invalid geometry,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint


In [15]:
# Create output csv containing endpoints with issues
# output_columns = "name,pipelines,endpoint_url,organisation,collection,maxentrydate,entrydate,end_date,last_status,last_updated_date"
output_columns = ['name', 'pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']

output_df = produce_output_csv(all_orgs_recent_endpoints, organisation_dataset_issues_dict, "issues", "No issues", output_columns)
output_df.to_csv('endpoint_issues_master_report.csv', index=False)
output_df.head(100)



Unnamed: 0,name,pipelines,endpoint_url,issues,organisation,collection,maxentrydate,entrydate,end_date,last_status,last_updated_date
0,Buckinghamshire Council,listed-building-outline,https://maps.buckscc.gov.uk/arcgis/services/PLANNING/RIPA_BOPS/MapServer/WFSServer?service=WFS&version=2.0.0&request=GetFeature&typeName=PLANNING_RIPA_BOPS:Listed_Buildings&outputFormat=GML2,"unknown entity, invalid geometry",local-authority-eng:BUC,listed-building-outline,2023-11-17 00:16:29+00:00,2022-05-04T16:16:06Z,,200.0,2023-11-16 00:16:27+00:00
1,Buckinghamshire Council,tree,https://maps.buckscc.gov.uk/arcgis/services/PLANNING/TPO_CSB_ST/MapServer/WFSServer?request=GetFeature&service=WFS&typename=PLANNING_TPO_CSB_ST:Tree_Preservation_Orders_-_Single_Trees__Chiltern___South_Bucks_,unknown entity,local-authority-eng:BUC,tree-preservation-zone,2023-11-17 00:14:09+00:00,2022-05-04T21:21:01Z,,200.0,2023-11-16 00:14:22+00:00
2,Buckinghamshire Council,conservation-area,https://maps.buckscc.gov.uk/arcgis/services/PLANNING/RIPA_BOPS/MapServer/WFSServer?service=WFS&version=2.0.0&request=GetFeature&typeName=PLANNING_RIPA_BOPS:Conservation_Areas&outputFormat=GML2,invalid geometry,local-authority-eng:BUC,conservation-area,2023-11-17 00:04:11+00:00,2022-05-06T18:18:20Z,,,
3,Doncaster Metropolitan Borough Council,tree,https://maps.doncaster.gov.uk/server/rest/services/Planning/TPO_Map/MapServer/0/,unknown entity,local-authority-eng:DNC,tree-preservation-order,2023-11-17 00:14:09+00:00,2022-10-26T10:15:59Z,,,
4,Gloucester City Council,article-4-direction-area,https://gcty.dynamicmaps.co.uk:8443/geoserver/Digital_Land/ows?service=wfs&request=GetFeature&TypeNames=Digital_Land%3Aarticle_4_DL&OutputFormat=GML2,unknown entity,local-authority-eng:GLO,article-4-direction,2023-11-17 00:15:42+00:00,2022-06-30T09:09:45Z,,,
5,London Borough of Camden,article-4-direction-area,https://opendata.camden.gov.uk/api/views/45mg-zjup/rows.csv,invalid geometry,local-authority-eng:CMD,article-4-direction-area,2023-11-17 00:15:42+00:00,2023-08-11T16:16:49Z,,,
6,London Borough of Camden,listed-building-outline,https://opendata.camden.gov.uk/api/views/uu3n-zgbj/rows.csv?accessType=DOWNLOAD,invalid geometry,local-authority-eng:CMD,listed-building,2023-11-17 00:16:29+00:00,2023-09-29T12:12:40Z,,,
7,London Borough of Lambeth,listed-building-outline,https://opendata.arcgis.com/datasets/a0ea54c61b1f4bdfbe9605324cf70c81_0.geojson,"unknown entity, invalid geometry",local-authority-eng:LBH,listed-building,2023-11-17 00:16:29+00:00,2021-11-23T19:19:01Z,,,
8,London Borough of Lambeth,tree-preservation-zone,https://opendata.arcgis.com/datasets/04ed133d01bf492b84bcec9de8397ec3_0.geojson,invalid geometry,local-authority-eng:LBH,tree-preservation-order,2023-11-17 00:14:09+00:00,2021-12-01T13:13:56Z,,500.0,2022-10-09 00:21:26+00:00
9,London Borough of Southwark,article-4-direction-area,https://raw.githubusercontent.com/digital-land/article-4-direction-collection/main/data/Southwark/Article%204%20_%20Public%20Houses.gpkg,"invalid geometry, unknown entity",local-authority-eng:SWK,article-4-direction,2023-11-17 00:15:42+00:00,2022-04-06T16:59:34Z,,,
