In [1]:
import urllib
import pandas as pd

# Selecting Endpoints of Primary Datasets **without** Documentation Url
The following query selects rows of the endpoint table where:
- The documentation_url **is** an empty character string or null
- The endpoint is one of the eight primary datasets
- The end date is null (endpoint has not been ended)
- The organisation field begins with "local-authority-eng:" (only selecting LPAs)

The following pandas code then selects only the most recent endpoints, per organisation-dataset group.

In [8]:
datasette_url = "https://datasette.planning.data.gov.uk/"
def grab_no_doc_endpoints():
    global endpoints_with_no_doc_url  
    params = urllib.parse.urlencode({
        "sql": f"""
        SELECT 
            s.organisation, 
            sp.pipeline AS "pipeline/dataset", 
            e.endpoint_url, 
            s.documentation_url,
            s.entry_date,
            e.endpoint
        FROM 
            endpoint e
            INNER JOIN source s on e.endpoint = s.endpoint
            INNER JOIN source_pipeline sp on s.source = sp.source
            INNER JOIN organisation o ON 
        WHERE 
            (documentation_url == "" OR documentation_url IS NULL)
            AND (sp.pipeline IN (
                'article-4-direction-area',
                'article-4-direction',
                'conservation-area',
                'conservation-area-document',
                'listed-building-outline',
                'tree-preservation-zone',
                'tree-preservation-order',
                'tree')) 
            AND (s.end_date == "" OR s.end_date IS NULL)
            AND s.organisation LIKE "local-authority%"
        """,
        "_size": "max"
    })
    
    url = f"{datasette_url}digital-land.csv?{params}"
    endpoints_with_no_doc_url = pd.read_csv(url)
    return endpoints_with_no_doc_url

df = grab_no_doc_endpoints()
df['entry_date'] = pd.to_datetime(df['entry_date'])

# Group by 'organisation' and 'pipeline/dataset', then find the index of the most recent entry
idx = df.groupby(['organisation', 'pipeline/dataset'])['entry_date'].idxmax()

# Use the index to select the rows
endpoints_with_no_doc_url = df.loc[idx].reset_index()

#Drop index field
endpoints_with_no_doc_url = endpoints_with_no_doc_url.drop("index", axis=1)

endpoints_with_no_doc_url

Unnamed: 0,organisation,pipeline/dataset,endpoint_url,documentation_url,entry_date,endpoint
0,local-authority-eng:BDG,article-4-direction-area,https://services3.arcgis.com/lCzPKKaGs7lhrnrV/...,,2023-12-14 00:00:00+00:00,3139776593c26742a0c9a92d3078957b90799c16cca269...
1,local-authority-eng:BDG,conservation-area,https://services3.arcgis.com/lCzPKKaGs7lhrnrV/...,,2023-12-01 14:14:52+00:00,29bfaf49f0d5deaaa3b0ca0ceac987c3a21effc034ad5a...
2,local-authority-eng:BDG,listed-building-outline,https://services3.arcgis.com/lCzPKKaGs7lhrnrV/...,,2023-12-13 00:00:00+00:00,659f589ebfeda0bc11835f7b171b50608f1090d835b9c8...
3,local-authority-eng:BIR,article-4-direction-area,https://maps.birmingham.gov.uk/server/rest/ser...,,2023-11-14 00:00:00+00:00,2d9575d771afff89f6d731be59a1ff8cedfd99efcd8bb2...
4,local-authority-eng:BIR,conservation-area,https://maps.birmingham.gov.uk/server/rest/ser...,,2023-11-14 00:00:00+00:00,a09608d26986c205de7ab8dc54b5d76c776ca236a9ecf9...
5,local-authority-eng:BUC,article-4-direction-area,https://maps.buckscc.gov.uk/arcgis/rest/servic...,,2023-11-07 00:00:00+00:00,65672f37a643acac1a9d5599a0631db5d9bad9b2cf8874...
6,local-authority-eng:BUC,conservation-area,https://maps.buckscc.gov.uk/arcgis/services/PL...,,2022-05-06 18:18:20+00:00,9419f8804448962f2567b68fa1697c9cae2e3b2d7db808...
7,local-authority-eng:BUC,listed-building-outline,https://maps.buckscc.gov.uk/arcgis/services/PL...,,2022-05-04 16:16:06+00:00,0cbe4f9bc30b4b44da34618881d5986fdf49cfcf5d37ea...
8,local-authority-eng:BUC,tree,https://maps.buckscc.gov.uk/arcgis/services/PL...,,2022-05-04 21:21:01+00:00,921954ef5241801cd3c199ace4f9e551e6214ab34ff66d...
9,local-authority-eng:BUC,tree-preservation-zone,https://maps.buckscc.gov.uk/arcgis/services/PL...,,2022-05-04 21:21:07+00:00,76a55253fac858f0ed7daf595191cbbc37f7493f35b38e...


In [3]:
download = input("Do you want to download the table? (yes/no): ")

if download.lower() == "yes":
    endpoints_with_no_doc_url.to_csv("endpoints_with_no_doc_url.csv", index=False)
    print("Query result downloaded as 'endpoints_with_no_doc_url.csv'") 

Do you want to download the table? (yes/no):  no


# Selecting Endpoints of Primary Datasets **with** Documentation Urls
The following query selects rows of the endpoint table where:
- The documentation_url is **not** an empty character string or null
- The endpoint is one of the eight primary datasets
- The end date is an empty string or null (endpoint has not been ended)
- The organisation field begins with "local-authority-eng:" (only selecting LPAs)

The following pandas code then selects only the most recent endpoints, per organisation-dataset group.

In [9]:

def grab_endpoints_with_docs():
    global endpoints_with_doc_url  
    params = urllib.parse.urlencode({
        "sql": f"""
        SELECT 
            s.organisation, 
            sp.pipeline AS 'pipeline/dataset', 
            e.endpoint_url, 
            s.documentation_url,
            s.entry_date,
            e.endpoint
        FROM 
            endpoint e
            INNER JOIN source s on e.endpoint = s.endpoint
            INNER JOIN source_pipeline sp on s.source = sp.source
        WHERE 
            (documentation_url != "" AND documentation_url IS NOT NULL)
            AND (sp.pipeline IN (
                'article-4-direction-area',
                'article-4-direction',
                'conservation-area',
                'conservation-area-document',
                'listed-building-outline',
                'tree-preservation-zone',
                'tree-preservation-order',
                'tree')) 
            AND (s.end_date == "" OR s.end_date IS NULL)
            AND s.organisation LIKE "local-authority%"
        """,
        "_size": "max"
    })
    
    url = f"{datasette_url}digital-land.csv?{params}"
    endpoints_with_doc_url = pd.read_csv(url)
    return endpoints_with_doc_url

df = grab_endpoints_with_docs()

df['entry_date'] = pd.to_datetime(df['entry_date'])

# Group by 'organisation' and 'pipeline/dataset', then find the index of the most recent entry
idx = df.groupby(['organisation', 'pipeline/dataset'])['entry_date'].idxmax()

# Use the index to select the rows
endpoints_with_doc_url = df.loc[idx].reset_index()

#Drop index field
endpoints_with_doc_url = endpoints_with_doc_url.drop("index", axis=1)

endpoints_with_doc_url

Unnamed: 0,organisation,pipeline/dataset,endpoint_url,documentation_url,entry_date,endpoint
0,local-authority-eng:BAB,conservation-area,http://inspire.misoportal.com/geoserver/baberg...,https://data.gov.uk/dataset/c782a0de-ef2f-4c6d...,2020-11-29 12:18:40+00:00,e6a63c92987299ca1409660135ab597f51b13f8c050421...
1,local-authority-eng:BAR,conservation-area,https://webgis1.barrowbc.gov.uk/inspire/wfs?se...,https://data.gov.uk/dataset/b62d9315-b1d5-455c...,2020-11-29 12:18:40+00:00,91ab6be088bfb215edd320eee06ad41dc291b8a20c21d5...
2,local-authority-eng:BNE,article-4-direction,https://open.barnet.gov.uk/download/2ylny/z7y/...,https://open.barnet.gov.uk/dataset/2ylny/artic...,2023-12-18 00:00:00+00:00,4d69e04b32ecfa83f9c17f1fed6f13a94dc8c839607dd8...
3,local-authority-eng:BNE,article-4-direction-area,https://open.barnet.gov.uk/download/e5l77/dhv/...,https://open.barnet.gov.uk/dataset/e5l77/artic...,2023-12-18 00:00:00+00:00,d4c09389082ca55e33cf532eb045ea7eb9dc447a24547f...
4,local-authority-eng:BNE,conservation-area,https://open.barnet.gov.uk/download/20yo8/c6n/...,https://open.barnet.gov.uk/dataset/20yo8/conse...,2023-11-06 00:00:00+00:00,1cafec4102c43a9e95f54bd36453240251c7741d7d15de...
...,...,...,...,...,...,...
119,local-authority-eng:WBK,tree-preservation-zone,https://gis.westberks.gov.uk/server/rest/servi...,https://www.westberks.gov.uk/planning-data-tre...,2024-01-24 00:00:00+00:00,c3b09959756401a0a1e9a3b75e34fe60dcf48c91b3b1b7...
120,local-authority-eng:WNM,conservation-area,https://geodata.rbwm.gov.uk/geoserver/wminspir...,https://data.gov.uk/dataset/739ffcb2-54ba-49a0...,2020-11-29 12:18:40+00:00,5f4c5116955fcf2d4398988c3edfcd12175cd371f9bacb...
121,local-authority-eng:WOX,conservation-area,http://inspire.misoportal.com/geoserver/west_o...,https://data.gov.uk/dataset/8e05f77a-2fd3-4c20...,2020-11-29 12:18:40+00:00,e2c7984b3b5f32e6420e243fef483d4d37b307f2ad84f4...
122,local-authority-eng:WYO,conservation-area,https://inspire.wycombe.gov.uk/getows.ashx?Map...,https://data.gov.uk/dataset/64f0dd17-3ec6-4099...,2020-11-29 12:18:40+00:00,c2aa5fe7f3050a4a2b4f5114bed6e5d6b4b492297e4d7e...


In [5]:
download = input("Do you want to download the table? (yes/no): ")

if download.lower() == "yes":
    endpoints_with_doc_url.to_csv("endpoints_with_doc_url.csv", index=False)
    print("Query result downloaded as 'endpoints_with_doc_url.csv'") 

Do you want to download the table? (yes/no):  no
