In [2]:
import pandas as pd
import urllib.parse
import ipywidgets as widgets

# Description

In [83]:
datasette_url = "https://datasette.planning.data.gov.uk/"

collection_csv_dict = {
    "article-4-direction": "https://files.planning.data.gov.uk/dataset/article-4-direction.csv",
    "article-4-direction-area": "https://files.planning.data.gov.uk/dataset/article-4-direction-area.csv",
    "conservation-area": "https://files.planning.data.gov.uk/dataset/conservation-area.csv",
    "conservation-area-document": "https://files.planning.data.gov.uk/dataset/conservation-area-document.csv",
    "tree-preservation-order": "https://files.planning.data.gov.uk/dataset/tree-preservation-order.csv",
    "tree": "https://files.planning.data.gov.uk/dataset/tree.csv",
    "tree-preservation-zone": "https://files.planning.data.gov.uk/dataset/tree-preservation-zone.csv"
}

global collection_options    
collection_options = {
    "Article 4 Direction and Article 4 Direction Area": ["article-4-direction", "article-4-direction-area"],
    "Conservation Area and Documents": ["conservation-area","conservation-area-document"],
    "Tree Preservation Order and Tree": ["tree-preservation-order", "tree"],
    "Tree Preservation Order and Tree Preservation Zones":["tree-preservation-order", "tree-preservation-zone"]
}

collection_dropdown = widgets.Dropdown(
    options=collection_options,
    description="Select dataset combination:",
)

def get_organisations():
    global org_df  
    params = urllib.parse.urlencode({
        "sql": f"""
        select
          *
        from
          organisation o
        """,
        "_size": "max"
    })
    url = f"{datasette_url}digital-land.csv?{params}"
    org_df = pd.read_csv(url)
    return org_df

def get_spatial_doc_matched_df(dataset_combination):
    global merged_df
    df_one = pd.read_csv(collection_csv_dict[dataset_combination[0]])
    df_two = pd.read_csv(collection_csv_dict[dataset_combination[1]])
    df_one = df_one[['entity', 'dataset', 'organisation-entity', 'reference']]
    df_two = df_two[['entity','dataset', 'organisation-entity', dataset_combination[0]]]
    merged_df = pd.merge(df_one, df_two, how='outer', left_on=['reference', 'organisation-entity'], right_on=[dataset_combination[0], 'organisation-entity'])
    org_df = get_organisations()[['name', 'entity']]
    merged_df = pd.merge(merged_df, org_df, how='left', left_on='organisation-entity', right_on='entity')
    merged_df = merged_df[['entity_x','entity_y','dataset_x', 'dataset_y', 'name', 'reference', dataset_combination[0]]]
    return merged_df

widgets.interact(get_spatial_doc_matched_df, dataset_combination=collection_options)
initial_organisation = collection_dropdown.value

interactive(children=(Dropdown(description='dataset_combination', options={'Article 4 Direction and Article 4 …

In [74]:
download = input("Do you want to download the table? (yes/no): ")

if download.lower() == "yes":
    merged_df.to_csv("merged_df.csv", index=False)
    print("Query result downloaded as 'merged_df.csv'")

Do you want to download the table? (yes/no):  yes


Query result downloaded as 'merged_df.csv'


In [95]:
def get_null_entities(dataset_combination):
    global null_entities
    df = get_spatial_doc_matched_df(dataset_combination)
    null_mask = df.isnull().any(axis=1)
    null_entities = df[null_mask].reset_index(drop=True)
    return null_entities
    

widgets.interact(get_null_entities, dataset_combination=collection_options)
initial_organisation = collection_dropdown.value

interactive(children=(Dropdown(description='dataset_combination', options={'Article 4 Direction and Article 4 …

In [97]:
download = input("Do you want to download the table? (yes/no): ")

if download.lower() == "yes":
    null_entities.to_csv("null_entities.csv", index=False)
    print("Query result downloaded as 'null_entities.csv'")

Do you want to download the table? (yes/no):  yes


Query result downloaded as 'null_entities.csv'
