This report serves as a place to collect the outputs from all the other reports in this repository, containing information 

In [11]:
from datetime import datetime as dt
from IPython.display import display
import urllib
import numpy as np
import math
import pandas as pd
import ipywidgets as widgets
import requests
import urllib.parse

In [12]:
datasette_url = "https://datasette.planning.data.gov.uk/"

def update_dataframe(organisation):
    global result_df  
    if organisation:
        query = f" s.organisation = '{organisation}'"
    else:
        query = f" s.organisation LIKE '%'"
    params = urllib.parse.urlencode({
        "sql": f"""
        select
          e.endpoint_url,
          l.status,
          s.collection,
          group_concat(DISTINCT sp.pipeline) as pipelines,
          s.organisation,
          o.name,
          max(l.entry_date) maxentrydate,
          max(e.entry_date) entrydate,
          e.end_date
        from
          log l
          inner join source s on l.endpoint = s.endpoint
          inner join organisation o on s.organisation=o.organisation
          inner join endpoint e on l.endpoint = e.endpoint
          inner join source_pipeline sp on s.source = sp.source
        where
           {query} and not collection="brownfield-land"
        group by
          l.endpoint,
          l.status
        order by
          l.endpoint,
          s.collection,
          maxentrydate desc
        """,
        "_size": "max"
    })
    
    url = f"{datasette_url}digital-land.csv?{params}"
    df = pd.read_csv(url)
    result_df = df
    return df

def update_dataframe_latest_status(organisation):
    global new_df
    all_endpoints=update_dataframe(organisation)
    new_df=all_endpoints.copy()
    new_df['maxentrydate'] = pd.to_datetime(new_df['maxentrydate'])
    new_df['last_status'] = None
    new_df['last_updated_date'] = None
    new_df['date_last_status_200'] = None
    
    for index, row in new_df.iterrows():
        if index < len(new_df) - 1 and (row['status']!=200 or pd.isna(row['status'])):
            if row['endpoint_url'] == new_df.at[index + 1, 'endpoint_url']:
                new_df.at[index, 'last_status'] = new_df.at[index + 1, 'status']
                new_df.at[index, 'last_updated_date'] = new_df.at[index + 1, 'maxentrydate']   
    
    new_df.drop_duplicates(subset='endpoint_url', keep='first', inplace=True)
    new_df.reset_index(drop=True, inplace=True)
    for index, row in new_df.iterrows():
        if row['last_status'] is not None:
            if row['last_status'] != 200  or row['last_status'] is None:
                filtered_df = all_endpoints[(all_endpoints['endpoint_url'] == row['endpoint_url'] ) & (all_endpoints['status'] == 200)]
                if not filtered_df.empty:
                    new_df.at[index, 'date_last_status_200'] = filtered_df['maxentrydate'].values[0][:19] 
    return new_df

organisation_list = ['local-authority-eng:NET', 'local-authority-eng:MDW', 
'local-authority-eng:LBH', 'local-authority-eng:GLO', 'local-authority-eng:DNC', 
'local-authority-eng:BUC', 'local-authority-eng:EPS', 'local-authority-eng:CAT']
collection_list = ['article-4-direction', 'article-4-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree']
collections = f"""('article-4-direction', 'article-4-area', 'conservation-area', 'conservation-area-document', 'listed-building-outline', 'tree-preservation-order', 'tree-preservation-zone', 'tree'ConnectionResetError)"""
all_orgs_recent_endpoints={}
for organisation in organisation_list:
    recent_endpoints_df = update_dataframe_latest_status(organisation)
    recent_endpoints_df = recent_endpoints_df[recent_endpoints_df['collection'].isin(collection_list)]
    all_orgs_recent_endpoints[organisation] = recent_endpoints_df


In [22]:
pd.set_option('display.max_colwidth', None)

def compute_cell_colour(status):
    if status == 200:
        return 'background-color: green'
    elif status == 404:
        return 'background-color: red'
    else:
        return 'background-color: orange'

rows_list = []
for organisation in organisation_list:
    df = all_orgs_recent_endpoints[organisation]
    # Revisit this drop duplicates - figure out how we should be handling multiple endpoints for same collection
    # Take the most recent entrydate/maxentrydate?
    df = df.drop_duplicates(subset='collection', keep='first')
    name = df['name'].values[0]
    statuses = {}
    for index, row in df.iterrows():
        statuses[row['collection']] = row['status']
    new_row = {'organisation': name}
    new_row.update(statuses)
    print(new_row)
    rows_list.append(new_row)

output_df = pd.DataFrame(rows_list, columns=['organisation', *collection_list])
output_df.style.applymap(compute_cell_colour, subset=collection_list)
# all_orgs_recent_endpoints["local-authority-eng:BUC"].head()
# output_df.head(10)

{'organisation': 'Newcastle City Council', 'conservation-area': 200, 'article-4-direction': 200, 'tree-preservation-order': 200}
{'organisation': 'Medway Council', 'tree-preservation-order': 200.0, 'article-4-direction': 200.0}
{'organisation': 'London Borough of Lambeth', 'tree-preservation-order': 404.0, 'article-4-direction': 404.0, 'conservation-area': 200.0}
{'organisation': 'Gloucester City Council', 'conservation-area': 200, 'article-4-direction': 200}
{'organisation': 'Doncaster Metropolitan Borough Council', 'conservation-area': 200.0, 'article-4-direction': 200.0, 'tree-preservation-order': 200.0}
{'organisation': 'Buckinghamshire Council', 'listed-building-outline': 200.0, 'article-4-direction': nan, 'tree-preservation-zone': 200.0, 'conservation-area': 200.0}
{'organisation': 'Epsom and Ewell Borough Council', 'conservation-area': nan}
{'organisation': 'Canterbury City Council', 'article-4-direction': 200.0, 'tree-preservation-order': 200.0, 'conservation-area': 200.0}


Unnamed: 0,organisation,article-4-direction,article-4-area,conservation-area,conservation-area-document,listed-building-outline,tree-preservation-order,tree-preservation-zone,tree
0,Newcastle City Council,200.0,,200.0,,,200.0,,
1,Medway Council,200.0,,,,,200.0,,
2,London Borough of Lambeth,404.0,,200.0,,,404.0,,
3,Gloucester City Council,200.0,,200.0,,,,,
4,Doncaster Metropolitan Borough Council,200.0,,200.0,,,200.0,,
5,Buckinghamshire Council,,,200.0,,200.0,,200.0,
6,Epsom and Ewell Borough Council,,,,,,,,
7,Canterbury City Council,200.0,,200.0,,,200.0,,
