# List of endpoints(404s) failing since last 5 days

In [2]:
import pandas as pd
import numpy as np
import urllib
from datetime import date
import urllib.parse
import matplotlib 

datasette_url = "https://datasette.planning.data.gov.uk/"

params = urllib.parse.urlencode({
    "sql": f"""
     SELECT l.endpoint, l.status, l.exception, s.collection, s.organisation, l.entry_date
  FROM log l
  inner join source s
    on l.endpoint = s.endpoint where
   l.entry_date >= DATE('now', '-5 day')
   and l.status = '404'
   and s.collection = 'brownfield-land'
   GROUP BY l.endpoint
HAVING COUNT(DISTINCT l.entry_date) >= 5
order by s.organisation

    """,
    "_size": "max"
})



url = f"{datasette_url}digital-land.csv?{params}"
df = pd.read_csv(url)
print("\033[1m List of endpoints failing consecutively since more than 5 days")
df

[1m List of endpoints failing consecutively since more than 5 days


Unnamed: 0,endpoint,status,exception,collection,organisation,entry_date
0,5d30aee8c82e775dd4be67dd417bf782b33de8522edc1e...,404,,brownfield-land,development-corporation:Q20648596,2023-07-15T00:23:09Z
1,421cc4d3b8a060560139bbcede1787693ad9fbb503f0a9...,404,,brownfield-land,local-authority-eng:ARU,2023-07-15T00:23:09Z
2,4d52c3bba39d8946fa880a6c81b32004af4234027926b8...,404,,brownfield-land,local-authority-eng:ARU,2023-07-15T00:23:09Z
3,efcce94049ff26654afcbcfd93741be8dca6160802d934...,404,,brownfield-land,local-authority-eng:ASH,2023-07-15T00:23:09Z
4,aab812d311605fb522ffde4e5118ef82372b32a0174efa...,404,,brownfield-land,local-authority-eng:BAE,2023-07-15T00:23:09Z
...,...,...,...,...,...,...
233,035538e79df78d7564f052a0aa3c3fe2d4b12044fc49f9...,404,,brownfield-land,national-park-authority:Q72617158,2023-07-15T00:23:09Z
234,14d7b1b539cbc40fd49b196b819b5b9ca05c64b5320482...,404,,brownfield-land,national-park-authority:Q72617158,2023-07-15T00:23:09Z
235,36fd71f4382c8051355d53e4c323fc43ad92ea2e421637...,404,,brownfield-land,national-park-authority:Q72617669,2023-07-15T00:23:09Z
236,81bf05fd3a01607072e23896ec4e61979e2f572828147f...,404,,brownfield-land,national-park-authority:Q72617669,2023-07-15T00:23:09Z


# Organisation of Failed endpoints and finding out if that organisation has other latest endpoints

In [4]:
#Organisation of Failed endpoints and finding out if that organisation has other latest endpoints
#organisations = ', '.join([f'"{organisation}"' for organisation in df['organisation']])

params = urllib.parse.urlencode({
    "sql": f"""
    SELECT s.collection, s.endpoint, s.end_date, s.organisation, s.source, MAX(s.entry_date) AS latest_entry_date, l.status, l.entry_date
FROM source s
INNER JOIN log l ON s.endpoint = l.endpoint
WHERE s.organisation IN (
    SELECT s2.organisation
    FROM log l2
    INNER JOIN source s2 ON l2.endpoint = s2.endpoint
    WHERE l2.entry_date >= DATE('now', '-5 day')
    AND l2.status = '404'
    AND s2.collection = 'brownfield-land'
    GROUP BY l2.endpoint
    HAVING COUNT(DISTINCT l2.entry_date) >= 5
)
AND s.collection = 'brownfield-land'
and s.end_date = '' and l.entry_date >= DATE('now', '-5 day')
GROUP BY s.organisation
    HAVING COUNT(DISTINCT l.entry_date) >= 5

    """,
    "_size": "max"
})

url = f"{datasette_url}digital-land.csv?{params}"
df = pd.read_csv(url)
print("\033[1m ")
df


[1m 


Unnamed: 0,collection,endpoint,end_date,organisation,source,latest_entry_date,status,entry_date
0,brownfield-land,a16e45dbefe2d67a6d27c086768b6c3610d4e057bb1962...,,development-corporation:Q20648596,a656a67fa2f327442c81886805a9b630,2019-12-01T00:00:00Z,200.0,2023-07-15T00:23:09Z
1,brownfield-land,ea98ea4d156ee47ff09af98d96d09951395b58e66d8b5f...,,local-authority-eng:ADU,dfda97292488d1926395113c7b6180c1,2023-07-06T11:11:52Z,200.0,2023-07-15T00:23:09Z
2,brownfield-land,133c2c73aa8288c55eddb9f547f10b002860503b1b435d...,,local-authority-eng:ARU,f0d836667ce15a34eade0a5ae1d37a32,2023-07-06T11:11:52Z,200.0,2023-07-15T00:23:09Z
3,brownfield-land,1c756f62c7d4335432c4dbe586300d51985bb7f03141f8...,,local-authority-eng:ASH,d87923ebbebef588e0e3653edef7f0dd,2023-07-06T11:11:52Z,200.0,2023-07-15T00:23:09Z
4,brownfield-land,23a0c6d0c737d5469a81ba4e83dee129b9898250af1ede...,,local-authority-eng:BAE,80f44f99897b7ae1d43f7a2579599554,2023-07-06T11:11:52Z,200.0,2023-07-15T00:23:09Z
...,...,...,...,...,...,...,...,...
130,brownfield-land,c6ad64da5e42a0feb55eba71fa1809473c4c4417cc4d61...,,local-authority-eng:WYR,a82ee94cab8ecc473af2320db250db88,2021-09-08T00:00:00Z,404.0,2023-07-15T00:23:09Z
131,brownfield-land,39f98ebf18256e8e5a5c049b591c52d121b0a1bf0a7fbf...,,national-park-authority:Q27159704,527dc744e3632101ca9fbde1585d37d7,2022-03-09T00:00:00Z,200.0,2023-07-15T00:23:09Z
132,brownfield-land,4c5b5e5b7853a69b9f5c26d8847df17213d8e2ca3f6656...,,national-park-authority:Q4972284,6d5e82bc95138cc2e2c4d24d63758912,2020-12-17T00:00:00Z,200.0,2023-07-15T00:23:09Z
133,brownfield-land,36e2aca78ebc25d31cf598e884b0be28039f19f4714b29...,,national-park-authority:Q72617158,e2fe67c417e66eab18be0d03ffcff1cb,2021-12-30T00:00:00Z,200.0,2023-07-15T00:23:09Z


In [None]:
download = input("Do you want to download the result? (yes/no): ")

if download.lower() == "yes":
    # Save the DataFrame as a CSV file
    df.to_csv("query_result.csv", index=False)
    print("Query result downloaded as 'query_result.csv'")

In [None]:
filtered_df = df[df['status'] == 404]
filtered_df

#local-authority-eng:CHS -- older date endpoint are passing and newly added endpoint is failing

In [None]:
download = input("Do you want to download the result? (yes/no): ")

if download.lower() == "yes":
    # Save the DataFrame as a CSV file
    filtered_df.to_csv("endpoint_failing_result.csv", index=False)
    print("Query result downloaded as 'endpoint_failing_result.csv'")