This report provides issue/data quality information on the most recent endpoints for a hardcoded list of prioritised list of LPAs, or organisations from an input.

The input should be called 'organisation_input.csv' and contain one column, 'organisation' that has the organisation codes for the LPAs to be included in the report.

In [7]:
import urllib
import numpy as np
import pandas as pd
import urllib.parse
import os
%pip install wget
import wget

Note: you may need to restart the kernel to use updated packages.


Download helper utility files from GitHub:

In [8]:
util_file = "master_report_endpoint_utils.py"
if os.path.isfile(util_file):
    from master_report_endpoint_utils import *
else:
    #Change this url to the raw url of the master_report_endpoint_utils.py file
    url = "https://raw.githubusercontent.com/digital-land/jupyter-analysis/main/endpoint_checker/convert_functions.py"
    wget.download(url)
    from master_report_endpoint_utils import *


In [9]:
# Get input from .csv or use default prioritised LPAs
input_path = './organisation_input.csv'
if os.path.isfile(input_path):
    input_df = pd.read_csv(input_path)
    organisation_list = input_df['organisation'].tolist()
    print('Input file found. Using', len(organisation_list), 'organisations from input file.')
else:
    organisation_list = ['local-authority-eng:BUC', 'local-authority-eng:DAC', 'local-authority-eng:DNC',
    'local-authority-eng:GLO', 'local-authority-eng:CMD', 'local-authority-eng:LBH', 'local-authority-eng:SWK',
    'local-authority-eng:MDW', 'local-authority-eng:NET', 'local-authority-eng:BIR', 'local-authority-eng:CAT',
    'local-authority-eng:EPS', 'local-authority-eng:BNE', 'local-authority-eng:GAT', 'local-authority-eng:GRY',
    'local-authority-eng:KTT', 'local-authority-eng:SAL', 'local-authority-eng:TEW', 'local-authority-eng:WBK',
    'local-authority-eng:DST', 'local-authority-eng:DOV', 'local-authority-eng:LIV', 'local-authority-eng:RDB',
    'local-authority-eng:WFT', 'local-authority-eng:NLN', 'local-authority-eng:NSM', 'local-authority-eng:SLF',
    'local-authority-eng:WRL' ]
    print('Input file not found. Using default list of organisations.')

Input file not found. Using default list of organisations.


In [10]:
# Get organisation names
organisation_info_df = pd.read_csv('https://raw.githubusercontent.com/digital-land/organisation-collection/main/data/local-authority.csv')
organisation_info_df.head()
organisation_name_dict = {}
for organisation in organisation_list:
    organisation_code = organisation.split(':')[1]
    organisation_name = organisation_info_df.loc[organisation_info_df['reference'] == organisation_code].iloc[0]['name']
    organisation_name_dict[organisation] = organisation_name

In [12]:
datasette_url = "https://datasette.planning.data.gov.uk/"


collection_list = ['article-4-direction', 'article-4-direction-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree']
pipelines_list = ['article-4-direction', 'article-4-direction-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree', 'tree,tree-preservation-order', 'tree-preservation-order,tree-preservation-zone']
all_orgs_recent_endpoints={}
for organisation in organisation_list:
    print(organisation)
    recent_endpoints_df = get_latest_endpoints(organisation)
    recent_endpoints_df = recent_endpoints_df[recent_endpoints_df['pipelines'].isin(pipelines_list)]
    all_orgs_recent_endpoints[organisation] = recent_endpoints_df


local-authority-eng:BUC
local-authority-eng:DAC
local-authority-eng:DNC
local-authority-eng:GLO
local-authority-eng:CMD
local-authority-eng:LBH
local-authority-eng:SWK
local-authority-eng:MDW
local-authority-eng:NET
local-authority-eng:BIR
local-authority-eng:CAT
local-authority-eng:EPS
local-authority-eng:BNE
local-authority-eng:GAT
local-authority-eng:GRY
local-authority-eng:KTT
local-authority-eng:SAL
local-authority-eng:TEW
local-authority-eng:WBK
local-authority-eng:DST
local-authority-eng:DOV
local-authority-eng:LIV
local-authority-eng:RDB
local-authority-eng:WFT
local-authority-eng:NLN
local-authority-eng:NSM
local-authority-eng:SLF
local-authority-eng:WRL


In [13]:
organisation_dataset_issues_dict = {}
info_issue_types = get_issues_with_severity_info()
for organisation in organisation_list:
    recent_endpoints_df = all_orgs_recent_endpoints[organisation]
    dataset_issues_dict = {}
    for index, row in recent_endpoints_df.iterrows():
        resource = row['resource']
        if ',' in row['pipelines']:
            datasets = row['pipelines'].split(',')
        else:
            datasets = [row['pipelines']]
        for dataset in datasets:
          issues_df = get_issues_for_resource(resource, dataset)
          issues_df.drop_duplicates(subset='issue_type', keep='first', inplace=True)
          issues = issues_df['issue_type'].values.tolist()
          
          if organisation_dataset_issues_dict.get(organisation, None) and (organisation_dataset_issues_dict.get(organisation, None)).get(dataset, None):
            existing_issues = organisation_dataset_issues_dict[organisation][dataset]
            issues_to_add = []
            for issue in issues:
              # Remove existing issues and severity=info issues from list 
              if issue not in (existing_issues or info_issue_types):
                issues_to_add.append(issue)
            dataset_issues_dict[dataset] = existing_issues.append(issues_to_add)
          else:
            # Remove info issues from list
            for issue in info_issue_types:
               if issue in issues:
                issues.remove(issue)
            dataset_issues_dict[dataset] = issues
        organisation_dataset_issues_dict[organisation] = dataset_issues_dict
for organisation, dataset_issues in organisation_dataset_issues_dict.items():
  for dataset, issues in dataset_issues.items():
    if issues is None or issues == []:
       organisation_dataset_issues_dict[organisation][dataset] = 'No issues'
    


In [14]:
pd.set_option('display.max_colwidth', None)

def compute_cell_colour(value):
    if value == "No issues":
        return 'background-color: green'
    elif value == "No endpoint":
        return 'background-color: orange'
    else:
        return 'background-color: red'

rows_list = []
for organisation in organisation_list:
    df = all_orgs_recent_endpoints[organisation]
    df = df[pd.isna(df['end_date'])]
    try:
        name = organisation_name_dict[organisation]
    except:
        name = organisation
    issues = {}
        
    new_row = {'organisation': name}
    if organisation_dataset_issues_dict.get(organisation, None) is not None:
        for k, v in organisation_dataset_issues_dict[organisation].items():
            if v != 'No issues':
                new_row[k] = ', '.join(v)
            else:
                new_row[k] = v
    rows_list.append(new_row)

output_df = pd.DataFrame(rows_list, columns=['organisation', *collection_list])
# output_df.to_csv('endpoint_status_master_report.csv', index=False)
output_df = output_df.replace(np.nan, "No endpoint")
output_df = output_df.style.applymap(compute_cell_colour, subset=collection_list)
output_df

Unnamed: 0,organisation,article-4-direction,article-4-direction-area,conservation-area,conservation-area-document,listed-building-outline,tree-preservation-order,tree-preservation-zone,tree
0,Buckinghamshire Council,No endpoint,No issues,invalid geometry,No endpoint,"unknown entity, invalid geometry",No endpoint,No issues,unknown entity
1,Dacorum Borough Council,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint
2,Doncaster Metropolitan Borough Council,No endpoint,No issues,No issues,No endpoint,No issues,No endpoint,No issues,unknown entity
3,Gloucester City Council,No endpoint,unknown entity,No issues,No endpoint,No issues,No endpoint,No endpoint,No endpoint
4,London Borough of Camden,No issues,invalid geometry,No issues,No endpoint,invalid geometry,No endpoint,No endpoint,No endpoint
5,London Borough of Lambeth,No endpoint,No issues,No issues,No endpoint,"unknown entity, invalid geometry",invalid date,invalid geometry,No issues
6,London Borough of Southwark,No issues,"invalid geometry, unknown entity",invalid geometry,No endpoint,No issues,No issues,No issues,No endpoint
7,Medway Council,No endpoint,No issues,invalid geometry,No endpoint,"unknown entity, invalid geometry",unknown entity,"unknown entity, invalid geometry",No endpoint
8,Newcastle City Council,No issues,invalid geometry,No issues,No endpoint,"patch, invalid geometry",No issues,No issues,No issues
9,Birmingham City Council,No endpoint,No issues,invalid geometry,No endpoint,No endpoint,No endpoint,No endpoint,No endpoint
