# Title
**Author**:  Greg Slater <br>
**Date**:  24th September 2024 <br>
**Dataset Scope**: `dataset` <br>
**Report Type**: Ad-hoc analysis <br>

## Purpose


In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import urllib
# from sqlite_query_functions import DatasetSqlite
from datetime import datetime
import shapely

pd.set_option("display.max_rows", 100)

td = datetime.today().strftime('%Y-%m-%d')
data_dir = "../../data/deleted_entities/"
os.makedirs(data_dir, exist_ok=True)
# fn = os.path.join(data_dir, f"FILENAME_{td}.csv")


In [23]:
def get_pdp_dataset(dataset, geometry_field = "geometry", crs_out=4326, underscore_cols=True):

    df = pd.read_csv(f"https://files.planning.data.gov.uk/dataset/{dataset}.csv", 
                     dtype = {"organisation-entity": pd.Int64Dtype()})  #, dtype = "str"
    df.columns = [x.replace("-", "_") for x in df.columns]

    df_valid_geom = df[df[geometry_field].notnull()].copy()

    # load geometry and create GDF
    df_valid_geom[geometry_field] = df_valid_geom[geometry_field].apply(shapely.wkt.loads)
    gdf = gpd.GeoDataFrame(df_valid_geom, geometry = geometry_field)

    # Transform to ESPG:27700 for more interpretable area units
    gdf.set_crs(epsg=4326, inplace=True)
    gdf.to_crs(epsg=crs_out, inplace=True)

    return gdf

In [24]:
def get_all_organisations():
    params = urllib.parse.urlencode({
        "sql": f"""
        select entity as organisation_entity, name as org_name, organisation, dataset as org_type, end_date
        from organisation
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/digital-land.csv?{params}"
    df = pd.read_csv(url, dtype={"organisation_entity" : pd.Int64Dtype()})
    return df

## Data Import

In [25]:
lookup_org = get_all_organisations()

In [26]:
bfl_gdf = get_pdp_dataset("brownfield-land", geometry_field="point", crs_out=27700)

In [4]:
uk_gdf = gpd.read_file("Countries_December_2023_Boundaries_UK_BFC_-7514124880420163797.gpkg")

In [51]:
old_ents_df = pd.read_csv(os.path.join(data_dir, "test - old ents - 2025-01-23.csv"))

In [114]:
bfl_gdf["old_entity"] = np.where(
    (bfl_gdf["entity"].isin(old_ents_df["entity"])) |
    (bfl_gdf["end_date"].notnull())
    , 
    True, False)

In [110]:
bfl_gdf["old_entity"].value_counts()

old_entity
False    24833
True      9064
Name: count, dtype: int64

## Analysis

In [32]:
# spatial join between UK extent and BFL points
bfl_uk = gpd.sjoin(
    bfl_gdf,
    uk_gdf,
    how = "inner",
    predicate = "intersects"
)

# beyond is where bfl points aren't in intersected table
bfl_beyond = bfl_gdf[~bfl_gdf["entity"].isin(bfl_uk["entity"])]

print(len(bfl_gdf))
print(len(bfl_uk))

33897
33628


In [116]:
bfl_beyond["old_entity"].value_counts()

old_entity
False    150
True     119
Name: count, dtype: int64

In [94]:
# count n beyond UK per org
bfl_beyond_org_count = bfl_beyond.groupby(["organisation_entity", "old_entity"], as_index=False).agg(
        "size"
    ).merge(
        lookup_org[["organisation_entity", "org_name", "organisation"]],
        how = "inner",
        on = "organisation_entity"
    )

# bfl_beyond_org_count.sort_values("size", ascending=False).to_csv("bfl_beyond_uk_by_org.csv", index = False)
bfl_beyond_org_count[bfl_beyond_org_count["old_entity"] == False].sort_values("size", ascending=False).head(20)

Unnamed: 0,organisation_entity,old_entity,size,org_name,organisation
6,72,False,63,Cannock Chase District Council,local-authority:CAN
28,186,False,14,Borough Council of King's Lynn and West Norfolk,local-authority:KIN
53,315,False,10,Stafford Borough Council,local-authority:STA
12,112,False,7,Dorset Council,local-authority:DST
61,337,False,4,Tendring District Council,local-authority:TEN
44,254,False,3,Borough of Poole,local-authority:POL
20,134,False,3,East Staffordshire Borough Council,local-authority:EST
71,368,False,3,Wiltshire Council,local-authority:WIL
23,152,False,3,Great Yarmouth Borough Council,local-authority:GRY
17,131,False,3,East Riding of Yorkshire Council,local-authority:ERY


In [92]:
bfl_beyond[
    (bfl_beyond["organisation_entity"] == 311) 
    # (bfl_beyond["old_entity"] == False)
][["entity", "reference", "name", "point", "site_address", "end_date", "old_entity"]]

Unnamed: 0,entity,reference,name,point,site_address,end_date,old_entity
22266,1724313,FW12,FW12,POINT (709459.125 433194.081),,,True
22267,1724314,LHU04,LHU04,POINT (714035.768 437450.659),,,True
22268,1724315,NLH12,NLH12,POINT (711917.151 436455.862),,,True
22269,1724316,LSM04,LSM04,POINT (709934.921 431957.559),,,True
22270,1724317,BBE12,BBE12,POINT (707443.052 436498.288),,,True


In [103]:
bfl_to_retire = bfl_beyond[bfl_beyond["old_entity"] == True].merge(
        lookup_org[["organisation_entity", "org_name", "organisation"]],
        how = "inner",
        on = "organisation_entity"
    )

bfl_to_retire.head()

Unnamed: 0,dataset,end_date,entity,entry_date,geojson,geometry,name,organisation_entity,point,prefix,...,planning_permission_history,planning_permission_status,planning_permission_type,site,site_address,site_categories,site_plan_url,old_entity,org_name,organisation_y
0,brownfield-land,,1700625,2021-12-13,,,BFL/GEO/017,221,POINT (124688.001 139379.987),brownfield-land,...,,permissioned,full planning permission,,"Fairleigh, Crowborough Road Georgeham Braunton...",,https://www.northdevon.gov.uk/media/378082/bfl...,True,North Devon District Council,local-authority:NDE
1,brownfield-land,,1700669,2017-12-19,,,28,39,POINT (308301.992 430351.955),brownfield-land,...,http://planning.blackburn.gov.uk/northgate/doc...,permissioned,permission in principle,,"Roe Lee Farm (Poultry Farm), off Royal Oak Ave...",,http://www.blackburn.gov.uk/Brownfield%20land%...,True,Blackburn with Darwen Borough Council,local-authority:BBD
2,brownfield-land,,1701265,2017-12-20,,,BLR04 (2012 SLAA page 160),149,POINT (562765.074 17368.350),brownfield-land,...,https://plan.gravesham.gov.uk/online-applicati...,not permissioned,,,Dover Road ELS Site,,http://selfservice.gravesham.gov.uk:8081/webdo...,True,Gravesham Borough Council,local-authority:GRA
3,brownfield-land,,1701534,2019-10-15,,,16/24628/FUL,315,POINT (677144.034 331234.650),brownfield-land,...,http://www.staffordbc.gov.uk/planning-public-a...,permissioned,full planning permission,,"128 North Walls, Stafford",,https://www.staffordbc.gov.uk/brownfield-land-...,True,Stafford Borough Council,local-authority:STA
4,brownfield-land,,1701563,2020-09-04,,,18/28772/OUT,315,POINT (677201.627 331300.398),brownfield-land,...,http://www.staffordbc.gov.uk/planning-public-a...,permissioned,other,,Former Stafford College Annex,,https://www.staffordbc.gov.uk/brownfield-land-...,True,Stafford Borough Council,local-authority:STA


In [127]:
# bfl_to_retire["entity"] = bfl_to_retire["entity"].astype.str

np.where(
    bfl_to_retire["end_date"].notnull(),
    bfl_to_retire["entity"].astype(str) + ",410,,,retire " + bfl_to_retire["organisation_y"] + " entity which is end-dated and is in the sea,,,",
    bfl_to_retire["entity"].astype(str) + ",410,,,retire " + bfl_to_retire["organisation_y"] + " entity which is not on latest resource and is in the sea,,,"
)

array(['1700625,410,,,retire local-authority:NDE entity which is not on latest resource and is in the sea,,,',
       '1700669,410,,,retire local-authority:BBD entity which is not on latest resource and is in the sea,,,',
       '1701265,410,,,retire local-authority:GRA entity which is not on latest resource and is in the sea,,,',
       '1701534,410,,,retire local-authority:STA entity which is not on latest resource and is in the sea,,,',
       '1701563,410,,,retire local-authority:STA entity which is not on latest resource and is in the sea,,,',
       '1719046,410,,,retire local-authority:STA entity which is not on latest resource and is in the sea,,,',
       '1719048,410,,,retire local-authority:STA entity which is not on latest resource and is in the sea,,,',
       '1719049,410,,,retire local-authority:STA entity which is not on latest resource and is in the sea,,,',
       '1719053,410,,,retire local-authority:STA entity which is not on latest resource and is in the sea,,,',
 