This report provides status information on the most recent endpoints for a hardcoded list of prioritised list of LPAs, or organisations from an input.

The input should be called 'input.csv' and contain one column, 'organisation' that has the organisation codes for the LPAs to be included in the report.

In [None]:
import urllib
import pandas as pd
import numpy as np
import urllib.parse
import os
%pip install wget
import wget

In [None]:
util_file = "master_report_endpoint_utils.py"
if os.path.isfile(util_file):
    from master_report_endpoint_utils import *
else:
    url = "https://raw.githubusercontent.com/digital-land/jupyter-analysis/main/service_report/master_report/master_report_endpoint_utils.py"
    wget.download(url)
    from master_report_endpoint_utils import *

In [None]:
# Get input from .csv or use default prioritised LPAs
input_path = './input.csv'
if os.path.isfile(input_path):
    input_df = pd.read_csv(input_path)
    organisation_list = input_df['organisation'].tolist()
    print('Input file found. Using', len(organisation_list), 'organisations from input file.')
else:
    organisation_list = ['local-authority-eng:BUC', 'local-authority-eng:DAC', 'local-authority-eng:DNC',
    'local-authority-eng:GLO', 'local-authority-eng:CMD', 'local-authority-eng:LBH', 'local-authority-eng:SWK',
    'local-authority-eng:MDW', 'local-authority-eng:NET', 'local-authority-eng:BIR', 'local-authority-eng:CAT',
    'local-authority-eng:EPS', 'local-authority-eng:BNE', 'local-authority-eng:GAT', 'local-authority-eng:GRY',
    'local-authority-eng:KTT', 'local-authority-eng:SAL', 'local-authority-eng:TEW', 'local-authority-eng:WBK',
    'local-authority-eng:DST', 'local-authority-eng:DOV', 'local-authority-eng:LIV', 'local-authority-eng:RDB',
    'local-authority-eng:WFT', 'local-authority-eng:NLN', 'local-authority-eng:NSM', 'local-authority-eng:SLF',
    'local-authority-eng:WRL' ]
    print('Input file not found. Using default list of organisations.')

In [None]:
# Get organisation info
organisation_info_df = pd.read_csv('https://raw.githubusercontent.com/digital-land/organisation-collection/main/data/local-authority.csv')
organisation_info_df.head()
organisation_name_dict = {}
for organisation in organisation_list:
    organisation_code = organisation.split(':')[1]
    organisation_name = organisation_info_df.loc[organisation_info_df['reference'] == organisation_code].iloc[0]['name']
    organisation_name_dict[organisation] = organisation_name

In [None]:
datasette_url = "https://datasette.planning.data.gov.uk/"

collection_list = ['article-4-direction', 'article-4-direction-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree']
pipelines_list = ['article-4-direction', 'article-4-direction-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree', 'tree,tree-preservation-order', 'tree-preservation-order,tree-preservation-zone','article-4-direction,article-4-direction-area']
all_orgs_recent_endpoints={}
for organisation in organisation_list:
    recent_endpoints_df = get_latest_endpoints(organisation)
    recent_endpoints_df = recent_endpoints_df[recent_endpoints_df['pipelines'].isin(pipelines_list)]
    all_orgs_recent_endpoints[organisation] = recent_endpoints_df

In [None]:
pd.set_option('display.max_colwidth', None)

def compute_cell_colour(status):
    if status == "200":
        return 'background-color: green'
    elif status == 'No endpoint':
        return 'background-color: orange'
    else:
        return 'background-color: red'
    
def cut_zeros(row):
  if row[-2:]=='.0':
    row=row[:-2]
  return row

rows_list = []
organisation_dataset_statuses_dict = {}
for organisation in organisation_list:
    latest_endpoints_df = all_orgs_recent_endpoints[organisation]
    latest_endpoints_df = latest_endpoints_df[pd.isna(latest_endpoints_df['end_date'])]
    try:
        name = organisation_name_dict[organisation]
    except:
        name = organisation
    
    dataset_statuses_dict = {}
    for index, row in latest_endpoints_df.iterrows():
        resource = row['resource']
        if ',' in row['pipelines']:
            datasets = row['pipelines'].split(',')
        else:
            datasets = [row['pipelines']]
        for dataset in datasets:
            # Consider cases where a dataset is contributed to by multiple endpoints
            same_datasets_df = recent_endpoints_df[recent_endpoints_df["pipelines"].apply(lambda x: dataset in x.split(','))]
            if len(same_datasets_df) > 1:
                skip_dataset = handle_skip_dataset(same_datasets_df, dataset, row)
            else:
                skip_dataset = False

            if not skip_dataset:
                status = row['status']

                # Handle cases where there is no status by looking at exceptions
                if not pd.isna(status):
                    status = int(status)
                else:
                    status=latest_endpoints_df.loc[latest_endpoints_df['status'].isna(), 'exception'].values[0]
                    if status is None:
                        status="Unknown Error"
                dataset_statuses_dict[dataset] = status
    organisation_dataset_statuses_dict[organisation] = dataset_statuses_dict
   
    new_row = {'organisation': name}
    new_row.update(dataset_statuses_dict)
    rows_list.append(new_row)

output_df = pd.DataFrame(rows_list, columns=['organisation', *collection_list])
output_df = output_df.replace(np.nan, "No endpoint")

output_df = output_df.astype(str)
output_df = output_df.applymap(cut_zeros)

output_df.to_csv('endpoint_status_master_report.csv', index=False)
output_df.style.applymap(compute_cell_colour, subset=collection_list)

In [None]:

output_columns = ['name', 'pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']

output_df = produce_output_csv(all_orgs_recent_endpoints, organisation_dataset_statuses_dict, "status", 200, output_columns)
output_df.to_csv('endpoint_status_not_200.csv', index=False)