In [None]:
from datetime import datetime as dt
from IPython.display import display
import ipywidgets as widgets
import urllib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
collection_input=['article-4-direction', 'article-4-direction-area', 'brownfield-land', 'conservation-area',  'listed-building-outline', 'tree', 'tree-preservation-order', 'tree-preservation-zone']
severity_input=[] # list of issue severities you want to get e.g ["error", "warning", "info", "notice"]
line_number_input=''

datasette_url = "https://datasette.planning.data.gov.uk/"

# Collect list of organisations
params = urllib.parse.urlencode({
        "sql": f"""
        select organisation, name, entity as organisation_entity
        from organisation
        """,
        "_size": "max"
        })
url = f"https://datasette.planning.data.gov.uk/digital-land.csv?{params}"
organisations_df = pd.read_csv(url)

collection_dfs=[]
output_collection_names=[]
for collection in collection_input:
    collection_organisations_dfs=[]
    for organisation in organisations_df.itertuples():
        params = urllib.parse.urlencode({
            "sql": f"""
            select x.resource, x.endpoint, x.organisation, x.name, x.entry_date, x.endpoint_url, x.collection
            from (
                select re.resource, re.endpoint, s.organisation, o.name, s.collection, s.entry_date, e.endpoint_url,
                    row_number() over (partition by s.organisation order by s.entry_date desc) as row_number
                from resource_endpoint re
                inner join endpoint e
                on re.endpoint = e.endpoint
                inner join source s
                on e.endpoint = s.endpoint
                inner join resource_organisation ro
                on re.resource = ro.resource
                inner join organisation o
                on ro.organisation = o.organisation
                where s.collection = '{collection}'
                and s.organisation = '{organisation[1]}'
                and s.end_date = ''
            ) x
            where x.row_number = 1
            """,
            "_size": "max"
        })

        url = f"{datasette_url}digital-land.csv?{params}"
        df = pd.read_csv(url)
        
        if (not df.empty):
            collection_organisations_dfs.append(df)
    # Check if collection_organisations_dfs is empty
    if (not collection_organisations_dfs):
        print("\033[1m No results found for ", collection)
    else:
        collection_organisations_dfs = pd.concat(collection_organisations_dfs)
        collection_dfs.append(collection_organisations_dfs)
        output_collection_names.append(collection)

In [None]:
params = urllib.parse.urlencode({
    "sql": f"""
    select description, issue_type, severity
    from issue_type
    """,
    "_size": "max"
})

url = f"{datasette_url}digital-land.csv?{params}"
issue_type = pd.read_csv(url)

In [None]:
issues_dfs=[]
for idx, collection_df in enumerate(collection_dfs):
    query=""
    if line_number_input:
        query = f" and line_number = '{line_number_input}'"

    resources = collection_df['resource'].tolist()
    issues = []
    for resource in resources:
        params = urllib.parse.urlencode({
        "sql": f"""
        select field,issue_type,dataset,resource,value, line_number
        from issue
        where resource = '{resource}'
        {query}
        """,
        "_size": "max"
        })
        url = f"{datasette_url}{collection_input[idx]}.csv?{params}"
        df1 = pd.read_csv(url)
        issues.append(df1)
    df1 = pd.concat(issues, ignore_index=True)
    issues_with_type = df1.merge(issue_type, left_on='issue_type', right_on='issue_type')
    issues_dfs.append(issues_with_type)

In [None]:
output_dfs=[]
for idx, collection_df in enumerate(collection_dfs):
    collection_issues_df = collection_df.merge(issues_dfs[idx], left_on='resource', right_on='resource')
    collection_issues_df = collection_issues_df.reindex(columns=['resource', 'organisation', 'name', 'dataset', 'entry_date', 'field', 'line_number', 'issue_type', 'value', 'severity', 'description', 'endpoint', 'endpoint_url']).reset_index(drop=True)
    if (severity_input):
        collection_issues_df = collection_issues_df.loc[collection_issues_df['severity'].isin(severity_input)]
    output_dfs.append(collection_issues_df)

In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)

def filter_column_by_value(df, column, value):
    return df.loc[df[column].isin(value)]

def compute_output_df(dataset, error, warning, info, notice, search):
    index = output_collection_names.index(dataset)
    selected_data = []
    output_df = output_dfs[index]
    if error or warning or info or notice:
        selected_data = []
        for i in range(0, len(severity_checkboxes)):
            if severity_checkboxes[i].value == True:
                selected_data = selected_data + [severity_checkboxes[i].description]
        output_df = filter_column_by_value(output_dfs[index], 'severity', selected_data)
    if search:
        mask = np.column_stack([output_df[col].astype('str').str.contains(search, na=False) for col in output_df])
        output_df = output_df.loc[mask.any(axis=1)]
    return output_df

def display_output_df(dataset, error, warning, info, notice, search):
    output_df = compute_output_df(dataset, error, warning, info, notice, search)
    display(output_df.head(1000))

def download_df(dataset, error, warning, info, notice, search):
    output_df = compute_output_df(dataset, error, warning, info, notice, search)
    output_df.to_csv(dataset + "-issues.csv")

severity_options = ["error", "warning", "info", "notice"]
severity_checkboxes = [widgets.Checkbox(value=False, description=severity) for severity in severity_options]

collection_selector = widgets.RadioButtons(
    options=output_collection_names,
    description='Select dataset to display:',
    disabled=False
)
download_button = widgets.Button(
    description = "Download output table",
    layout=widgets.Layout(width='200px'),
)
download_button.on_click(lambda b: download_df(collection_selector.value, severity_checkboxes[0].value, severity_checkboxes[1].value, severity_checkboxes[2].value, severity_checkboxes[3].value, search_box.value))
search_box = widgets.Text(placeholder="Search table", layout=widgets.Layout(width='200px'))

severity_filter = widgets.VBox(severity_checkboxes, layout = widgets.Layout(flex_flow='row wrap'))
ui = widgets.VBox([collection_selector, search_box, download_button, severity_filter])
out = widgets.interactive_output(display_output_df, {'dataset': collection_selector, "error": severity_checkboxes[0], "warning": severity_checkboxes[1], "info": severity_checkboxes[2], "notice": severity_checkboxes[3], "search": search_box})
display(ui, out)

In [None]:
def display_output_df(dataset, error, warning, info, notice, search):
    output_df = compute_output_df(dataset, error, warning, info, notice, search)
    fields = output_df['field']
    field_count = {}
    
    for field in fields:
        if field in field_count:
            field_count[field] += 1
        else:
            field_count[field] = 1
    x_values = list(field_count.keys())
    y_values = list(field_count.values())

    print("Issue count for",dataset,"is",sum(y_values))
    figsize = (20, 5) if dataset == "brownfield-land" else (5, 5)

    fig,ax1 = plt.subplots(1,1,figsize=figsize)
    
    for i, value in enumerate(y_values):
        ax1.text(i, value, str(value),ha='center', va='bottom')

    ax1.bar(x_values, y_values)
    ax1.set_xlabel('issue field')
    ax1.set_ylabel('count')
    ax1.set_title('Issues Type')
    ax1.set_xticks(x_values)
    ax1.tick_params(axis='x', rotation=35)
    
ui = widgets.VBox([collection_selector, severity_filter])
out = widgets.interactive_output(display_output_df, {'dataset': collection_selector, "error": severity_checkboxes[0], "warning": severity_checkboxes[1], "info": severity_checkboxes[2], "notice": severity_checkboxes[3], "search": search_box})

display(ui,out)