This report serves as a place to collect the outputs from all the other reports in this repository, containing information 

In [1]:
from datetime import datetime as dt
from IPython.display import display
import urllib
import numpy as np
import math
import pandas as pd
import ipywidgets as widgets
import requests
import urllib.parse

In [2]:
datasette_url = "https://datasette.planning.data.gov.uk/"

def update_dataframe(organisation):
    global result_df  
    if organisation:
        query = f" s.organisation = '{organisation}'"
    else:
        query = f" s.organisation LIKE '%'"
    params = urllib.parse.urlencode({
        "sql": f"""
        select
          e.endpoint_url,
          l.status,
          s.collection,
          group_concat(DISTINCT sp.pipeline) as pipelines,
          s.organisation,
          o.name,
          max(l.entry_date) maxentrydate,
          max(e.entry_date) entrydate,
          e.end_date
        from
          log l
          inner join source s on l.endpoint = s.endpoint
          inner join organisation o on s.organisation=o.organisation
          inner join endpoint e on l.endpoint = e.endpoint
          inner join source_pipeline sp on s.source = sp.source
        where
           {query} and not collection="brownfield-land"
        group by
          l.endpoint,
          l.status
        order by
          l.endpoint,
          s.collection,
          maxentrydate desc
        """,
        "_size": "max"
    })
    
    url = f"{datasette_url}digital-land.csv?{params}"
    df = pd.read_csv(url)
    result_df = df
    return df

def update_dataframe_latest_status(organisation):
    global new_df
    all_endpoints=update_dataframe(organisation)
    new_df=all_endpoints.copy()
    new_df['maxentrydate'] = pd.to_datetime(new_df['maxentrydate'])
    new_df['last_status'] = None
    new_df['last_updated_date'] = None
    new_df['date_last_status_200'] = None
    
    for index, row in new_df.iterrows():
        if index < len(new_df) - 1 and (row['status']!=200 or pd.isna(row['status'])):
            if row['endpoint_url'] == new_df.at[index + 1, 'endpoint_url']:
                new_df.at[index, 'last_status'] = new_df.at[index + 1, 'status']
                new_df.at[index, 'last_updated_date'] = new_df.at[index + 1, 'maxentrydate']   
    
    new_df.drop_duplicates(subset='endpoint_url', keep='first', inplace=True)
    new_df.reset_index(drop=True, inplace=True)
    for index, row in new_df.iterrows():
        if row['last_status'] is not None:
            if row['last_status'] != 200  or row['last_status'] is None:
                filtered_df = all_endpoints[(all_endpoints['endpoint_url'] == row['endpoint_url'] ) & (all_endpoints['status'] == 200)]
                if not filtered_df.empty:
                    new_df.at[index, 'date_last_status_200'] = filtered_df['maxentrydate'].values[0][:19] 
    return new_df

organisation_list = ['local-authority-eng:NET', 'local-authority-eng:MDW', 
'local-authority-eng:LBH', 'local-authority-eng:GLO', 'local-authority-eng:DNC', 
'local-authority-eng:BUC', 'local-authority-eng:EPS', 'local-authority-eng:CAT']
collection_list = ['article-4-direction', 'conservation-area', 'listed-building-authority', 'tree']
collections = f"""('article-4-direction', 'conservation-area', 'listed-building-authority', 'tree')"""
all_orgs_recent_endpoints={}
for organisation in organisation_list:
    recent_endpoints_df = update_dataframe_latest_status(organisation)
    recent_endpoints_df = recent_endpoints_df[recent_endpoints_df['collection'].isin(collection_list)]
    all_orgs_recent_endpoints[organisation] = recent_endpoints_df


In [9]:
rows_list = []
for organisation in organisation_list:
    df = all_orgs_recent_endpoints[organisation]
    df = df.drop_duplicates(subset='collection', keep='first')
    name = df['name'].values[0]
    # iterate over rows of df
    statuses = {}
    for index, row in df.iterrows():
        statuses[row['collection']] = row['status']
    new_row = {'organisation': name}
    new_row.update(statuses)
    rows_list.append(new_row)

output_df = pd.DataFrame(rows_list, columns=['organisation', *collection_list])
output_df.head()

Unnamed: 0,endpoint_url,status,collection,pipelines,organisation,name,maxentrydate,entrydate,end_date,last_status,last_updated_date,date_last_status_200
0,https://mapping.canterbury.gov.uk/arcgis/rest/...,200.0,article-4-direction,article-4-direction-area,local-authority-eng:CAT,Canterbury City Council,2023-10-12 00:13:48+00:00,2021-11-11T14:14:25Z,,,,
5,https://mapping.canterbury.gov.uk/arcgis/rest/...,200.0,conservation-area,conservation-area,local-authority-eng:CAT,Canterbury City Council,2023-10-12 00:05:51+00:00,2021-11-16T13:13:02Z,,,,
