In [1]:
from datetime import datetime as dt
from IPython.display import display
import ipywidgets as widgets
import urllib
import pandas as pd
import numpy as np

In [2]:
collection_input=['article-4-direction', 'article-4-direction-area', 'brownfield-land', 'conservation-area',  'listed-building-outline', 'tree', 'tree-preservation-order', 'tree-preservation-zone']
organisation_input=''
severity_input=[] # list of issue severities you want to get e.g ["error", "warning", "info", "notice"]
line_number_input=''

datasette_url = "https://datasette.planning.data.gov.uk/"

current_date = dt.now().date()

date_query = f" where SUBSTRING(s.entry_date, 1, 4) = '2023'"


collection_dfs=[]
output_collection_names=[]
for collection in collection_input:
    query = ""
    if collection_input:
        query = f" and s.collection = '{collection}'"

    if organisation_input:
        query = query + f" and s.organisation = '{organisation_input}'"

    params = urllib.parse.urlencode({
        "sql": f"""
        select re.resource, re.endpoint, s.organisation, s.entry_date, e.endpoint_url, l.status
        from resource_endpoint re
        inner join endpoint e
        on re.endpoint = e.endpoint
        inner join source s
        on e.endpoint = s.endpoint
        inner join log l
        on s.endpoint = l.endpoint
        {date_query}
        {query}

        """,
        "_size": "max"
    })

    url = f"{datasette_url}digital-land.csv?{params}"
    df = pd.read_csv(url)
    df=df.drop_duplicates().reset_index(drop=True)
    if (df.empty):
        print("\033[1m No results found for ", collection)
    else:
        collection_dfs.append(df)
        output_collection_names.append(collection)

[1m No results found for  listed-building-outline
[1m No results found for  tree
[1m No results found for  tree-preservation-order
[1m No results found for  tree-preservation-zone


In [3]:
params = urllib.parse.urlencode({
    "sql": f"""
    select description, issue_type, severity
    from issue_type
    """,
    "_size": "max"
})

url = f"{datasette_url}digital-land.csv?{params}"
issue_type = pd.read_csv(url)

In [4]:
issues_dfs=[]
for idx, collection_df in enumerate(collection_dfs):
    query=""
    if line_number_input:
        query = f" and line_number = '{line_number_input}'"

    resources = collection_df['resource'].tolist()
    issues = []
    for resource in resources:
        params = urllib.parse.urlencode({
        "sql": f"""
        select field,issue_type,dataset,resource,value, line_number
        from issue
        where resource = '{resource}'
        {query}
        """,
        "_size": "max"
        })
        url = f"{datasette_url}{collection_input[idx]}.csv?{params}"
        df1 = pd.read_csv(url)
        issues.append(df1)
    df1 = pd.concat(issues, ignore_index=True)
    issues_with_type = df1.merge(issue_type, left_on='issue_type', right_on='issue_type')
    issues_dfs.append(issues_with_type)

In [5]:
output_dfs=[]
for idx, collection_df in enumerate(collection_dfs):
    collection_issues_df = collection_df.merge(issues_dfs[idx], left_on='resource', right_on='resource')
    collection_issues_df = collection_issues_df.reindex(columns=['resource', 'organisation', 'dataset', 'entry_date', 'field', 'line_number', 'issue_type', 'value', 'severity', 'description', 'endpoint', 'status', 'endpoint_url']).reset_index(drop=True)
    if (severity_input):
        collection_issues_df = collection_issues_df.loc[collection_issues_df['severity'].isin(severity_input)]
    output_dfs.append(collection_issues_df)

In [6]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)

def filter_column_by_value(df, column, value):
    return df.loc[df[column].isin(value)]

def compute_output_df(dataset, error, warning, info, notice, search):
    index = output_collection_names.index(dataset)
    selected_data = []
    output_df = output_dfs[index]
    if error or warning or info or notice:
        selected_data = []
        for i in range(0, len(severity_checkboxes)):
            if severity_checkboxes[i].value == True:
                selected_data = selected_data + [severity_checkboxes[i].description]
        output_df = filter_column_by_value(output_dfs[index], 'severity', selected_data)
    if search:
        mask = np.column_stack([output_df[col].astype('str').str.contains(search, na=False) for col in output_df])
        output_df = output_df.loc[mask.any(axis=1)]
    return output_df

def display_output_df(dataset, error, warning, info, notice, search):
    output_df = compute_output_df(dataset, error, warning, info, notice, search)
    display(output_df.head(1000))

def download_df(dataset, error, warning, info, notice, search):
    output_df = compute_output_df(dataset, error, warning, info, notice, search)
    output_df.to_csv(dataset + "-issues.csv")

severity_options = ["error", "warning", "info", "notice"]
severity_checkboxes = [widgets.Checkbox(value=False, description=severity) for severity in severity_options]

collection_selector = widgets.RadioButtons(
    options=output_collection_names,
    description='Select dataset to display:',
    disabled=False
)
download_button = widgets.Button(
    description = "Download output table",
    layout=widgets.Layout(width='200px'),
)
download_button.on_click(lambda b: download_df(collection_selector.value, severity_checkboxes[0].value, severity_checkboxes[1].value, severity_checkboxes[2].value, severity_checkboxes[3].value, search_box.value))
search_box = widgets.Text(placeholder="Search table", layout=widgets.Layout(width='200px'))

severity_filter = widgets.VBox(severity_checkboxes, layout = widgets.Layout(flex_flow='row wrap'))
ui = widgets.VBox([collection_selector, search_box, download_button, severity_filter])
out = widgets.interactive_output(display_output_df, {'dataset': collection_selector, "error": severity_checkboxes[0], "warning": severity_checkboxes[1], "info": severity_checkboxes[2], "notice": severity_checkboxes[3], "search": search_box})
display(ui, out)

VBox(children=(RadioButtons(description='Select dataset to display:', options=('article-4-direction', 'article…

Output()