In [3]:
import pandas as pd
import geopandas as gpd
import shapely.wkt
import urllib
import os
import numpy as np

In [4]:
# if running on Colab, uncomment and run this line below too:
# !pip install mapclassify

In [5]:
output_dir = "output/"
os.makedirs(output_dir, exist_ok=True)

In [57]:
# Functions

def get_all_organisations():
    params = urllib.parse.urlencode({
        "sql": f"""
        select entity as organisation_entity, name as org_name, organisation, dataset as org_type, end_date, 
        local_planning_authority as LPACD, local_authority_district,
        case when dataset = "local-authority" then local_authority_district else local_planning_authority end as statistical_geography
        from organisation
        where name != "Waveney District Council"
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/digital-land.csv?{params}"
    df = pd.read_csv(url, dtype = str)
    return df


def get_pdp_dataset(dataset, geometry_field = "geometry", crs_out=4326, underscore_cols=True):

    df = pd.read_csv(f"https://files.planning.data.gov.uk/dataset/{dataset}.csv", dtype = "str")
    df.columns = [x.replace("-", "_") for x in df.columns]

    df_valid_geom = df[df[geometry_field].notnull()].copy()

    # load geometry and create GDF
    df_valid_geom[geometry_field] = df_valid_geom[geometry_field].apply(shapely.wkt.loads)
    gdf = gpd.GeoDataFrame(df_valid_geom, geometry = geometry_field)

    # Transform to ESPG:27700 for more interpretable area units
    gdf.set_crs(epsg=4326, inplace=True)
    gdf.to_crs(epsg=crs_out, inplace=True)

    return gdf

## Data in

In [None]:
# get org lookup
org_df = get_all_organisations()
print(len(org_df))

In [8]:
# # read in manual count sheet
# con_count_df = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSGZIudsGx0ez4cU-4wSvymvXIFfpDb_qfbS3uW5RiuBkJrJQ9D8k0HBUPtgncRXA/pub?gid=485605871&single=true&output=csv")
# con_count_df.columns = [x.replace("-", "_") for x in con_count_df.columns]

# # join on organisation names and LPA codes
# con_count_lpa_df = con_count_df.merge(
#     org_df[["organisation_entity", "name", "local_planning_authority"]],
#     how = "left",
#     on = "organisation_entity"
# )

# print(len(con_count_lpa_df))
# # con_count_lpa_df.head()

In [None]:
ca_gdf = get_pdp_dataset("conservation-area", "point")

print(len(ca_gdf))

ca_gdf = ca_gdf.merge(
    org_df[["organisation_entity", "org_name", "org_type"]],
    how = "left",
    on = "organisation_entity"
)

type_rank = dict(
    {
        "local-authority":1,
        "government-organisation":2
    })

ca_gdf["org_type_rank"] = ca_gdf["org_type"].map(type_rank)

print(len(ca_gdf))

In [None]:
# LPA boundaries from PDP site
lpa_gdf = get_pdp_dataset("local-planning-authority", "geometry")
# drop empty end-date column from LPA dataset
# lpa_gdf.drop(["end_date", "organisation", "region"], axis = 1, inplace=True)

# rename for easier joining
lpa_gdf.rename(columns={
        'name':'lpa_name',
        'reference':'LPACD'}, 
    inplace=True)

print(len(lpa_gdf))

# join on LAD org names and populated end date
lpa_gdf = lpa_gdf[["dataset", "entity", "geometry", "lpa_name", "LPACD"]].merge(
    org_df[["LPACD", "end_date"]],
    how = "left",
    on = "LPACD"
)

lpa_live_gdf = lpa_gdf[lpa_gdf["end_date"].isnull()].copy()

print(len(lpa_gdf))
lpa_gdf.head()

## Analysis

### Spatial joining

In [None]:
# join LPAs to all conservation areas, then join on the names of supplying organisations for matching conservation areas
lpa_ca_join = gpd.sjoin(
    lpa_live_gdf[["LPACD", "lpa_name", "geometry"]],
    ca_gdf[["entity", "organisation_entity", "org_name", "org_type", "org_type_rank", "point"]],
    how = "left",
    predicate = "intersects"
)

print(len(lpa_ca_join))
lpa_ca_join.head()


In [None]:
# join LPAs to all conservation areas, then join on the names of supplying organisations for matching conservation areas
lpa_ca_join = lpa_live_gdf[["LPACD", "lpa_name", "geometry"]].overlay(
    ca_gdf[["entity", "organisation_entity", "org_name", "org_type", "org_type_rank", "point"]],
    how = "intersection"
)

print(len(lpa_ca_join))
lpa_ca_join.head()


In [None]:
type(lpa_ca_join)

In [None]:
# lpa_prov_map = lpa_ca_join.groupby(["LPACD", "lpa_name", "geometry"], as_index=False).agg(
#     org_rank_min = ("org_type_rank", "min")
# )

rank_quality = dict(
    {
        np.nan : "none",
        1 : "trustworthy",
        2 : "some"
    })

lpa_prov_map["prov_quality"] = lpa_prov_map["org_rank_min"].map(rank_quality)

# lpa_prov_map["geometry"] = lpa_prov_map["geometry"].apply(shapely.wkt.loads)
lpa_prov_map = gpd.GeoDataFrame(lpa_prov_map, geometry = "geometry")

lpa_prov_map.head()

In [None]:
lpa_prov_map.explore(
    column = "prov_quality"
)