In [None]:
from datetime import datetime as dt
from IPython.display import display, HTML
import ipywidgets as widgets
import urllib
import pandas as pd

In [None]:
collection_input=['conservation-area', 'article-4-direction-area', 'article-4-direction', 'listed-building-outline', 'tree', 'tree-preservation-order', 'tree-preservation-zone']
organisation_input=''
severity_input=[] # list of severities e.g ["error", "warning", "info", "notice"]
line_number_input=''

datasette_url = "https://datasette.planning.data.gov.uk/"

current_date = dt.now().date()

date_query = f" where SUBSTRING(s.entry_date, 1, 4) = '2023'"


collection_dfs=[]
output_collection_names=[]
for collection in collection_input:
    query = ""
    if collection_input:
        query = f" and s.collection = '{collection}'"

    if organisation_input:
        query = query + f" and s.organisation = '{organisation_input}'"

    params = urllib.parse.urlencode({
        "sql": f"""
        select re.resource, re.endpoint, s.organisation, s.entry_date, e.endpoint_url
        from resource_endpoint re
        inner join endpoint e
        on re.endpoint = e.endpoint
        inner join source s
        on e.endpoint = s.endpoint
        {date_query}
        {query}

        """,
        "_size": "max"
    })

    url = f"{datasette_url}digital-land.csv?{params}"
    df = pd.read_csv(url)
    if (df.empty):
        print(print("\033[1m No results found for ", collection))
    else:
        collection_dfs.append(df)
        output_collection_names.append(collection)

In [None]:
params = urllib.parse.urlencode({
    "sql": f"""
    select description, issue_type, severity
    from issue_type
    """,
    "_size": "max"
})

url = f"{datasette_url}digital-land.csv?{params}"
issue_type = pd.read_csv(url)

In [None]:
issues_dfs=[]
for idx, collection_df in enumerate(collection_dfs):
    query=""
    if line_number_input:
        query = f" and line_number = '{line_number_input}'"

    resources = collection_df['resource'].tolist()
    issues = []
    for resource in resources:
        params = urllib.parse.urlencode({
        "sql": f"""
        select field,issue_type,dataset,resource,value, line_number
        from issue
        where resource = '{resource}'
        {query}
        """,
        "_size": "max"
        })
        url = f"{datasette_url}{collection_input[idx]}.csv?{params}"
        df1 = pd.read_csv(url)
        issues.append(df1)
    df1 = pd.concat(issues, ignore_index=True)
    issues_with_type = df1.merge(issue_type, left_on='issue_type', right_on='issue_type')
    issues_dfs.append(issues_with_type)

In [None]:
output_dfs=[]
for idx, collection_df in enumerate(collection_dfs):
    collection_issues_df = collection_df.merge(issues_dfs[idx], left_on='resource', right_on='resource')
    collection_issues_df = collection_issues_df.reindex(columns=['organisation', 'dataset', 'endpoint_url', 'entry_date', 'field', 'line_number', 'issue_type', 'value', 'severity', 'description']).set_index('organisation')
    if (severity_input):
        collection_issues_df = collection_issues_df.loc[collection_issues_df['severity'].isin(severity_input)]
    output_dfs.append(collection_issues_df)

In [None]:
def f(dataset):
    index = output_collection_names.index(dataset)
    display(HTML(output_dfs[index].to_html()))

selector = widgets.RadioButtons(
    options=output_collection_names,
    description='Select dataset to display',
    disabled=False
)
ui = widgets.VBox([selector])
out = widgets.interactive_output(f, {'dataset': selector})
display(ui, out)