In [1]:
from datetime import datetime as dt
from IPython.display import display
import urllib
import numpy as np
import math
import pandas as pd
import ipywidgets as widgets
import requests
import urllib.parse

In [2]:
datasette_url = "https://datasette.planning.data.gov.uk/"

result_df = pd.DataFrame()

def update_dataframe(organisation):
    global result_df  
    if organisation:
        query = f" s.organisation = '{organisation}'"
    else:
        query = f" s.organisation LIKE '%'"
    params = urllib.parse.urlencode({
        "sql": f"""
        select
          e.endpoint_url,
          l.status,
          s.collection,
          group_concat(DISTINCT sp.pipeline) as pipelines,
          s.organisation,
          max(l.entry_date) maxentrydate,
          max(e.entry_date) entrydate,
          e.end_date,
          o.name
        from
          log l
          inner join source s on l.endpoint = s.endpoint
          inner join organisation o on s.organisation=o.organisation
          inner join endpoint e on l.endpoint = e.endpoint
          inner join source_pipeline sp on s.source = sp.source
        where
           {query} and not collection="brownfield-land"
        group by
          l.endpoint,
          l.status
        order by
          l.endpoint,
          s.collection,
          maxentrydate desc
        """,
        "_size": "max"
    })
    
    url = f"{datasette_url}digital-land.csv?{params}"
    df = pd.read_csv(url)
    result_df = df
    return df

global organisation_options    
organisation_options = {
    "All":None,"Newcastle": "local-authority-eng:NET","Medway": "local-authority-eng:MDW","Lambeth": "local-authority-eng:LBH",
    "Gloucester": "local-authority-eng:GLO","Doncaster": "local-authority-eng:DNC","Buckinghamshire": "local-authority-eng:BUC","Epsom and Ewell": "local-authority-eng:EPS",
    "Canterbury": "local-authority-eng:CAT"
    
}
global organisation_dropdown
organisation_dropdown = widgets.Dropdown(
    options=organisation_options,
    description="Select LPA:",
)

widgets.interact(update_dataframe, organisation=organisation_dropdown)
initial_organisation = organisation_dropdown.value

interactive(children=(Dropdown(description='Select LPA:', options={'All': None, 'Newcastle': 'local-authority-…

<function __main__.update_dataframe(organisation)>

In [3]:
download = input("Do you want to download the table with all endpoints? (yes/no): ")

if download.lower() == "yes":
    result_df.to_csv("endpoints_with_all_status.csv", index=False)
    print("Query result downloaded as 'endpoints_with_all_status.csv'") 

Do you want to download the table with all endpoints? (yes/no):  


In [8]:
new_df = pd.DataFrame()

def update_dataframe_latest_status(organisation):
    global new_df
    all_endpoints=update_dataframe(organisation)
    new_df=all_endpoints.copy()
    new_df['maxentrydate'] = pd.to_datetime(new_df['maxentrydate'])
    new_df['last_status'] = None
    new_df['last_updated_date'] = None
    new_df['date_last_status_200'] = None
    
    for index, row in new_df.iterrows():
        if index < len(new_df) - 1 and (row['status'] == 404 or row['status'] == 500 or pd.isna(row['status'])):
            if row['endpoint_url'] == new_df.at[index + 1, 'endpoint_url']:
                new_df.at[index, 'last_status'] = new_df.at[index + 1, 'status']
                new_df.at[index, 'last_updated_date'] = new_df.at[index + 1, 'maxentrydate']   
    
    new_df.drop_duplicates(subset='endpoint_url', keep='first', inplace=True)
    new_df.reset_index(drop=True, inplace=True)
    for index, row in new_df.iterrows():
        if row['last_status'] is not None:
            if not math.isnan(row['last_status']):
                if int(row['last_status']) != 200  or row['last_status'] is None:
                    filtered_df = all_endpoints[(all_endpoints['endpoint_url'] == row['endpoint_url'] ) & (all_endpoints['status'] == 200)]
                    new_df.at[index, 'date_last_status_200'] = filtered_df['maxentrydate'].values[0][:19]
    return new_df

widgets.interact(update_dataframe_latest_status, organisation=organisation_dropdown)
initial_organisation = organisation_dropdown.value

interactive(children=(Dropdown(description='Select LPA:', index=3, options={'All': None, 'Newcastle': 'local-a…

In [5]:
download = input("Do you want to download the table with latest endpoints? (yes/no): ")

if download.lower() == "yes":
    new_df.to_csv("endpoints_with_latest_status.csv", index=False)
    print("Query result downloaded as 'endpoints_with_latest_status.csv'")

Do you want to download the table with latest endpoints? (yes/no):  


In [9]:
filtered_df = pd.DataFrame()

def update_dataframe_erroring_endpoints(organisation):
    global filtered_df
    filtered_df=update_dataframe_latest_status(organisation)
    filtered_df = filtered_df[filtered_df['status'] != 200]
    filtered_df.reset_index(drop=True, inplace=True)
    return filtered_df

widgets.interact(update_dataframe_erroring_endpoints, organisation=organisation_dropdown)
initial_organisation = organisation_dropdown.value

interactive(children=(Dropdown(description='Select LPA:', index=3, options={'All': None, 'Newcastle': 'local-a…

In [7]:
download = input("Do you want to download the table with erroring endpoints being collected till date? (yes/no): ")

if download.lower() == "yes":
    filtered_df.to_csv("endpoints_not_200.csv", index=False)
    print("Query result downloaded as 'endpoints_not_200.csv'")

Do you want to download the table with erroring endpoints being collected till date? (yes/no):  
