In [None]:
collections = {
    'Article 4 Direction': (
        'https://raw.githubusercontent.com/digital-land/article-4-direction-collection/main/collection/source.csv',
        'https://raw.githubusercontent.com/digital-land/article-4-direction-collection/main/collection/endpoint.csv'),
    "Conservation Area": (
        'https://raw.githubusercontent.com/digital-land/conservation-area-collection/main/collection/source.csv',
        'https://raw.githubusercontent.com/digital-land/conservation-area-collection/main/collection/endpoint.csv'),
    "Listed Building": (
        'https://raw.githubusercontent.com/digital-land/listed-building-collection/main/collection/source.csv',
        'https://raw.githubusercontent.com/digital-land/listed-building-collection/main/collection//endpoint.csv'),
    "Tree Preservation Order": (
        'https://raw.githubusercontent.com/digital-land/tree-preservation-order-collection/main/collection/source.csv',
        'https://raw.githubusercontent.com/digital-land/tree-preservation-order-collection/main/collection/endpoint.csv')
}

lpa_filter = None

In [None]:
import csv
from io import StringIO
import urllib.request
import pandas as pd

In [None]:
lpa_name = {}

raw = urllib.request.urlopen('https://raw.githubusercontent.com/digital-land/organisation-dataset/main/data/local-authority.csv').read().decode('utf-8')
with StringIO(raw) as f:
    reader = csv.DictReader(f)
    for row in reader:
        lpa_name[row['local-authority-eng']] = row['name']

In [None]:
def get_csv(url: str):
    raw = urllib.request.urlopen(url).read().decode('utf-8')
    with StringIO(raw) as f:
        reader = csv.DictReader(f)
        return list(reader)

In [None]:
endpoint_frames = {}

for collection in collections:
    print(f"Processing collection {collection}...")

    sources_url, endpoints_url = collections[collection]
    sources = get_csv(sources_url)
    endpoints = dict(map(lambda x:((x['endpoint'],x['endpoint-url'])), get_csv(endpoints_url)))

    data = []
    
    for source in sources:
        if not source['organisation'].startswith('local-authority-eng'):
            continue # not an LPA row            
        lpa = lpa_name[source['organisation'].split(':')[1].upper()]
        if source['endpoint'] and not source['documentation-url']:
            endpoint_url = endpoints[source['endpoint']]
            data.append( (lpa, source['collection'],endpoint_url,source['documentation-url']) )

    endpoint_frames[collection] = pd.DataFrame(data=sorted(data), columns=['LPA Name','Collection','Endpoint','Documentation URL'])

In [None]:
filename = "endpoints-without-documentation.xlsx"
with pd.ExcelWriter(filename) as writer:
    for frame in endpoint_frames:
        endpoint_frames[frame].to_excel(writer, sheet_name=frame, index=False)

from IPython.display import FileLink

print("Excel file:")
FileLink(filename)