## Description    

The purpose of this notebook is to produce some interactive html maps which can be shared with LPAs to help them explore and resolve duplication or overlap issues with conservation area data that's been supplied.

The initial focus is on interaction between LPA and Historic England (HE) polygons, to highlight to LPAs where there are close matches (in which case we'll redirect the HE polygon to the LPA one), or where there are differences - for example there are many LPA polygons within a single HE polygon (in which case we'd like a steer from the LPA on the resolution).

### How to use
1. Run all cells within the "Functions", "Data import", and "Report" sections   

2. In the "Map for LPA analysis" section set the `org_name` variable to one of the LPAs from the table above, and run all the section cells below - this should save a html version of the Folium map in the notebook directory. 

In [1]:
# from download_data import download_dataset
# from data import get_entity_dataset, nrow
# from plot import plot_map, plot_issues_map
import pandas as pd
import geopandas as gpd
import os
import shapely.wkt
import logging

# import matplotlib.pyplot as plt
import urllib

import numpy as np
import folium

pd.set_option("display.max_rows", 100)


In [2]:
# if running on Colab, uncomment and run this line below too:
# !pip install mapclassify

In [24]:
# global variables
data_dir = "../data/geo_analysis/LPA-investigations/"
os.makedirs(data_dir, exist_ok=True)

### Functions

In [49]:
def nrow(df):
    return print(f"No. of records in df: {len(df):,}")


def plot_issues_map(gdf:gpd.GeoDataFrame, entity_list, chloro_var, palette):

    if type(gdf) != gpd.GeoDataFrame:
        logging.error('input is not a GeodataFrame')
    
    base = gdf[gdf["entity"].isin(entity_list)].explore(
        column = chloro_var,  # make choropleth based on "BoroName" column
        cmap = palette,
        tooltip = False,
        popup = ["organisation_name", "entity", "name", "entry_date", "reference"],
        tiles = "CartoDB positron",  # use "CartoDB positron" tiles
        highlight = True,
        style_kwds = {
        "fillOpacity" : "0.1"
        }
    )
    
    return base

def get_all_organisations():
    params = urllib.parse.urlencode({
        "sql": f"""
        select organisation, name, entity as organisation_entity, statistical_geography
        from organisation
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/digital-land.csv?{params}"
    df = pd.read_csv(url)
    return df


def get_old_entity(collection_name):
    params = urllib.parse.urlencode({
        "sql": f"""
        select *
        from old_entity
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/{collection_name}.csv?{params}"
    df = pd.read_csv(url)
    return df

def get_issue_entities(issues_df):

    return pd.concat([issues_df["entity_1"], issues_df["entity_2"]]).drop_duplicates().to_list()

### Data import

In [5]:
# get LAD to LPA lookup from github
lookup_lad_lpa = pd.read_csv("https://github.com/digital-land/organisation-collection/raw/main/data/local-authority.csv",
                             usecols = ["entity", "local-authority-district", "local-planning-authority"])

lookup_lad_lpa.columns = ["organisation_entity", "LADCD", "LPACD"]

nrow(lookup_lad_lpa)
lookup_lad_lpa.head()

No. of records in df: 376


Unnamed: 0,organisation_entity,LADCD,LPACD
0,26,E07000223,E60000281
1,27,E07000026,E60000019
2,28,E07000032,E60000077
3,29,E07000224,E60000282
4,30,E07000105,E60000253


In [6]:
# get org data from datasette
lookup_org = get_all_organisations()

# lookup_org["organisation_entity"] = lookup_org["organisation_entity"].astype(str)
lookup_org.columns = ["organisation", "organisation_name", "organisation_entity", "statistical_geography"]

# split out org type and join on LPA codes from LAD to LPA lookup
lookup_org["organisation_type"] = lookup_org["organisation"].apply(lambda x: x.split(":")[0])
lookup_org = lookup_org.merge(lookup_lad_lpa, how = "left", on = "organisation_entity")

nrow(lookup_org)
lookup_org.head()

No. of records in df: 437


Unnamed: 0,organisation,organisation_name,organisation_entity,statistical_geography,organisation_type,LADCD,LPACD
0,passenger-transport-executive:Q25171369,West Midlands Passenger Transport Executive,408,,passenger-transport-executive,,
1,passenger-transport-executive:Q6820591,Merseytravel,409,,passenger-transport-executive,,
2,passenger-transport-executive:Q682520,Transport for London,410,,passenger-transport-executive,,
3,passenger-transport-executive:Q7569004,South Yorkshire Passenger Transport Executive,411,,passenger-transport-executive,,
4,passenger-transport-executive:Q7834921,Transport for Greater Manchester,412,,passenger-transport-executive,,


In [7]:
# check what types of org are missing the LPA code
nrow(lookup_org[lookup_org["LPACD"].isnull()])
# lookup_org[lookup_org["LPACD"].isnull()].groupby("organisation_type").size()

No. of records in df: 104


organisation_type
development-corporation          14
government-organisation          20
local-authority                  43
national-park-authority          10
nonprofit                         1
passenger-transport-executive     9
public-authority                  1
regional-park-authority           1
waste-authority                   5
dtype: int64

In [8]:
# LPA boundary data from planning.data.gov

LPA_boundary_df = pd.read_csv("https://files.planning.data.gov.uk/dataset/local-planning-authority.csv", 
                                  usecols = ["reference", "name", "geometry"])

LPA_boundary_df.columns = ["geometry", "name", "LPACD"]


# load geometry and create GDF
LPA_boundary_df['geometry'] = LPA_boundary_df['geometry'].apply(shapely.wkt.loads)
LPA_boundary_gdf = gpd.GeoDataFrame(LPA_boundary_df, geometry='geometry')

# Transform to ESPG:27700 for more interpretable area units
LPA_boundary_gdf.set_crs(epsg=4326, inplace=True)
LPA_boundary_gdf.to_crs(epsg=27700, inplace=True)

nrow(LPA_boundary_gdf)
# LPA_boundary_gdf.head()


No. of records in df: 337


Unnamed: 0,geometry,name,LPACD
0,"MULTIPOLYGON (((428366.003 554230.393, 428288....",County Durham LPA,E60000001
1,"MULTIPOLYGON (((436388.046 522354.244, 436372....",Darlington LPA,E60000002
2,"MULTIPOLYGON (((449073.036 536806.421, 448888....",Hartlepool LPA,E60000003
3,"MULTIPOLYGON (((451894.321 521145.352, 451858....",Middlesbrough LPA,E60000004
4,"MULTIPOLYGON (((429247.025 604972.344, 429241....",Northumberland LPA,E60000005


In [31]:
# load conservation area entity dataset from planning.data.gov into geopandas and transform CRS to EPSG:27700

entity_df = pd.read_csv("https://files.planning.data.gov.uk/dataset/conservation-area.csv",
                            usecols = ["entity", "name", "organisation-entity", "reference", "entry-date", "geometry"])
            
# entity_df.head()
entity_df.columns = [x.replace("-", "_") for x in entity_df.columns]



# set entity to string, needed later to sort and remove duplicate self intersections
entity_df["entity"] = entity_df["entity"].astype(str)
# entity_df["organisation_entity"] = entity_df["organisation_entity"].astype(str)

# join organisation name and LPA codes from lookup
entity_df = entity_df.merge(
    lookup_org[["organisation_name", "organisation_type", "organisation_entity", "LPACD"]], 
    how = "left",
    on = "organisation_entity")

# load geometry and create GDF
entity_df['geometry'] = entity_df['geometry'].apply(shapely.wkt.loads)
entity_gdf = gpd.GeoDataFrame(entity_df, geometry='geometry')

# Transform to ESPG:27700 for more interpretable area units
entity_gdf.set_crs(epsg=4326, inplace=True)
entity_gdf.to_crs(epsg=27700, inplace=True)

# calculate area
entity_gdf["area"] = entity_gdf["geometry"].area


# flag for whether org is HE or LPA
entity_gdf["org_HE_LPA"] = np.where(entity_gdf["organisation_entity"] == 16, "Historic England", "Local Planning Authority")

nrow(entity_gdf)
entity_gdf.head()

No. of records in df: 8,923


Unnamed: 0,entity,entry_date,geometry,name,organisation_entity,reference,organisation_name,organisation_type,LPACD,area,org_HE_LPA
0,44000001,2022-04-12,"MULTIPOLYGON (((516981.159 204270.242, 516973....",Napsbury,16,5080,Historic England,government-organisation,,495087.300218,Historic England
1,44000002,2022-04-12,"MULTIPOLYGON (((512390.333 209659.962, 512382....",Shafford Mill,16,5071,Historic England,government-organisation,,136187.979619,Historic England
2,44000003,2022-04-12,"MULTIPOLYGON (((511610.510 205098.079, 511611....",Potters Crouch,16,5074,Historic England,government-organisation,,34603.675292,Historic England
3,44000004,2022-04-12,"MULTIPOLYGON (((512515.275 200300.431, 512520....",Old Brickett Wood,16,5075,Historic England,government-organisation,,55128.469061,Historic England
4,44000005,2022-04-12,"MULTIPOLYGON (((520248.830 206717.191, 520410....",Sleapshyde,16,5078,Historic England,government-organisation,,44167.433073,Historic England


In [10]:
# check of the organisations that we don't have an LPA code for
entity_df[entity_df["LPACD"].isnull()].groupby(["organisation_type", "organisation_name"]).size()

organisation_type        organisation_name                    
development-corporation  London Legacy Development Corporation       2
government-organisation  Historic England                         7032
local-authority          North Dorset District Council              37
                         Purbeck District Council                  126
national-park-authority  Peak District National Park Authority      21
dtype: int64

# Identifying geographical duplicates  
## Report

Aim of this is to quickly categorise the overlaps based on whether they fall into the following groups:

Entity overlaps with another: 

1. within the same organisation
    
2. from a different organisation   

    a. LPA entity overlaps with entity from another LPA
        
    b. LPA entity overlaps with entity from Historic England


<br>

As well as classifying by how much the overlaps are happening in order inform possible resolutions. Each entity-entity overlap is put in one of the following groups, which are given a corresponding priority to address:


* **> 90% combined match** (high priority): 90% or more of each entity's area overlaps with the other - this suggests the boundaries of each almost perfectly match  

* **> 90% single match** (medium priority): 90% of more of one entity's area overlaps with the other - this suggests the entities overlap but the boundaries don't closely match (one may be much larger than the other, for instance)
* **> edge intersection** (low priority): between 1 - 10% of each entity's area overlaps with the other
* **> unclassified** (low priority): the two entities overlap somewhat, but the overlapped area makes up less than 90% of each entity's area
* **> tiny edge** (ignore): less than 1% of each entity's area overlaps with the other - there are a large number of these and are relatively normal when combining data from many sources.

<br>

Some other useful bits of information are flagged in order to make some suggestions about why the problem has happened. These should be checked when using the output csvs:
* does the entry date for each entity match? When it doesn't it suggests that the issue may have arisen from data being combined from successive endpoints.
* do either of the entities exist as an old entity in the `old-entity.csv` for the collection?
* do either of the entities have other issues associated with them (excluding edge intersections, as it's common to have many of these). This can be useful to know if an issue is that one much larger polygon covers many smaller ones.

<br>

In [13]:
MATCH_LOWER_THRESH = 0.9  # defines the lower limit of the shared overlap between two entities to be called a match
EDGE_UPPER_THRESH = 0.1   # defines the upper limit of the shared overlap between two entities to be called an edge intersection
EDGE_LOWER_THRESH = 0.01   # defines the lower limit of the shared overlap between two entities to be called an edge intersection


# full join of all geometries
entity_join_all = gpd.overlay(
    entity_gdf, 
    entity_gdf,
    how = "intersection", keep_geom_type=False 
)


# remove self-intersections and duplicates of the same intersections
entity_join_all = entity_join_all[entity_join_all["entity_1"] != entity_join_all["entity_2"]]

entity_join_all["entity_join"] = entity_join_all.apply(lambda x: '-'.join(sorted(x[["entity_1", "entity_2"]])), axis=1)

# extra sort to make sure matches to Historic England always show as Historic England as org 2 
entity_join_all["name_for_sort"] = np.where(entity_join_all["organisation_entity_1"] == 16, "Z", "A")
entity_join_all.sort_values(["entity_join", "name_for_sort"], ascending=True, inplace=True)

entity_join_all.drop_duplicates(subset="entity_join", inplace = True)  #Drop them by name

# nrow(entity_join_all)

# flag the types of intersections between organisations
# is org the same
entity_join_all["int_org_match"] = np.where(entity_join_all["organisation_entity_1"] == entity_join_all["organisation_entity_2"], True, False)

# the types of org-org matches
entity_join_all["int_org_types"] = np.select(
    [
        (entity_join_all["organisation_entity_1"] == 16) & (entity_join_all["organisation_entity_2"] == 16),
        (entity_join_all["organisation_entity_1"] != 16) & (entity_join_all["organisation_entity_2"] != 16),
        ((entity_join_all["organisation_entity_1"] != 16) & (entity_join_all["organisation_entity_2"] == 16)) |
        ((entity_join_all["organisation_entity_1"] == 16) & (entity_join_all["organisation_entity_2"] != 16))
    ],
    ["HE - HE", "LPA - LPA", "HE - other"],
    default = "-"
)

# does the entity entry date match?
entity_join_all["date_match"] = np.where(entity_join_all["entry_date_1"] == entity_join_all["entry_date_2"], True, False)

# has one of the intersected entities already been re-mapped?
entity_join_all["entity_old"] = np.where(entity_join_all["entity_1"].isin(old_entity_df["old_entity"]) |
                                         entity_join_all["entity_2"].isin(old_entity_df["old_entity"]), True, False)


# calculate overlap %'s

entity_join_all["area_intersection"] = entity_join_all["geometry"].area

entity_join_all["p_pct_intersect"] = entity_join_all["area_intersection"] / entity_join_all["area_1"]
entity_join_all["pct_intersection"] = entity_join_all["area_intersection"] / (entity_join_all["area_1"] + entity_join_all["area_2"] - entity_join_all["area_intersection"])
entity_join_all["s_pct_intersect"] = entity_join_all["area_intersection"] / entity_join_all["area_2"]

# intersection area as % of smallest primary or secondary area
entity_join_all["pct_min_intersection"] = entity_join_all["area_intersection"] / entity_join_all[["area_1", "area_2"]].min(axis = 1)


entity_join_all["intersection_type"] = np.select(
    [
        (entity_join_all["p_pct_intersect"] >= MATCH_LOWER_THRESH) & (entity_join_all["s_pct_intersect"] >= MATCH_LOWER_THRESH),
        (entity_join_all["pct_min_intersection"] <= EDGE_UPPER_THRESH) & (entity_join_all["pct_min_intersection"] >= EDGE_LOWER_THRESH),
        (entity_join_all["pct_min_intersection"] < EDGE_LOWER_THRESH),
        ((entity_join_all["p_pct_intersect"] >= MATCH_LOWER_THRESH) | (entity_join_all["s_pct_intersect"] >= MATCH_LOWER_THRESH)),
        
    ],
    [
        "> 90% combined match", "edge intersection", "tiny edge - ignore", "> 90% single match"
    ],
    default = "unclassified"
)

nrow(entity_join_all)
entity_join_all.head()

No. of records in df: 2,766


Unnamed: 0,entity_1,entry_date_1,name_1,organisation_entity_1,reference_1,organisation_name_1,organisation_type_1,LPACD_1,area_1,entity_2,...,int_org_match,int_org_types,date_match,entity_old,area_intersection,p_pct_intersect,pct_intersection,s_pct_intersect,pct_min_intersection,intersection_type
7,44000009,2022-04-12,Childwickbury,16,5063,Historic England,government-organisation,,1885513.0,44000007,...,True,HE - HE,True,False,2.170036,1e-06,4.225103e-07,6.675918e-07,1e-06,tiny edge - ignore
19,44000770,2022-04-12,Leominster Town,16,2499,Historic England,government-organisation,,255623.2,44000017,...,True,HE - HE,True,False,2.033437,8e-06,4.665495e-06,1.128278e-05,1.1e-05,tiny edge - ignore
47,44000043,2022-04-12,Butterworth Hall,16,7716,Historic England,government-organisation,,29687.92,44000042,...,True,HE - HE,True,False,0.0,0.0,0.0,0.0,0.0,tiny edge - ignore
57,44003132,2022-04-12,Worcester and Birmingham Canal,16,449,Historic England,government-organisation,,253931.3,44000050,...,True,HE - HE,True,False,4.129991,1.6e-05,7.315925e-06,1.329709e-05,1.6e-05,tiny edge - ignore
100,44005296,2022-04-12,Lewes,16,3207,Historic England,government-organisation,,1360607.0,44000105,...,True,HE - HE,True,False,125.147014,9.2e-05,8.53257e-05,0.001178229,0.001178,tiny edge - ignore


In [14]:
# FLAGGING ISSUE DETAILS

# Org overlap types
entity_join_all["issue_type"] = np.select(
    [
        (entity_join_all["int_org_match"] == False) & (entity_join_all["int_org_types"] == "HE - other"),
        (entity_join_all["int_org_match"] == False) & (entity_join_all["int_org_types"] == "LPA - LPA"),
        (entity_join_all["int_org_match"] == True) & (entity_join_all["int_org_types"] == "HE - HE"),
        (entity_join_all["int_org_match"] == True) & (entity_join_all["int_org_types"] == "LPA - LPA")
    ],
    ["Between organisations - Historic England to LPA",
     "Between organisations - LPA to a different LPA",
     "Within organisation - Historic England",
     "Within organisation - LPA"],

    default = "-"
)

# Action
entity_join_all["action"] = np.select(
    [
        (entity_join_all["intersection_type"] == "tiny edge - ignore"),
        (entity_join_all["int_org_match"] == True) & (entity_join_all["int_org_types"] == "HE - HE"),
        (entity_join_all["int_org_match"] == False) & (entity_join_all["int_org_types"] == "HE - other") & (entity_join_all["intersection_type"] == "> 90% combined match") 

    ],
    ["ignore", "ignore", "remap"],
    default = "investigate"
)

# Priority and hint
issue_priority_mapping = {
    "> 90% combined match" : "high",
    "> 90% single match" : "medium",
    "edge intersection" : "low",
    "unclassified" : "low",
    "tiny edge - ignore" : ""
}

issue_hint_mapping = {
    True : "raise with LPA",
    False : "check endpoints"
}

entity_join_all["priority"] = [issue_priority_mapping[data["intersection_type"]] if data["action"] != "ignore" else "" for (index, data) in entity_join_all.iterrows()]
entity_join_all["hint"] = [issue_hint_mapping[data["date_match"]] if 
                           (data["int_org_match"] == True) & (data["int_org_types"] == "LPA - LPA") & (data["action"] != "ignore") 
                           else "" for (index, data) in entity_join_all.iterrows()]


In [220]:
# Flag entities which have multiple issues (this is discounting where the issue type is tiny edge intersections)
no_tinies = entity_join_all[entity_join_all["intersection_type"] != "tiny edge - ignore"]

all_ents = pd.concat([no_tinies["entity_1"], no_tinies["entity_2"]], ignore_index = True)
multi_issue_ents = all_ents.loc[all_ents.duplicated(keep = False)]


entity_join_all["multiple_issues"] = np.where(
    ((entity_join_all["intersection_type"] != "tiny edge - ignore") & (entity_join_all["entity_2"].isin(multi_issue_ents))) |
    ((entity_join_all["intersection_type"] != "tiny edge - ignore") & (entity_join_all["entity_1"].isin(multi_issue_ents))),
    True, False)

entity_join_all.groupby("multiple_issues").size()


multiple_issues
False    2474
True      292
dtype: int64

In [16]:
# check the flagging or intersections between different org types is correct
# entity_join_all.groupby(["int_org_match", "int_org_types", "organisation_entity_1", "organisation_entity_2"]).size()

In [17]:
addressable_issues = entity_join_all[entity_join_all["action"] != "ignore"]

print(f"there are {len(addressable_issues)} addressable issues in total")
print("\n")
addressable_issues.groupby(['issue_type', 'intersection_type', 'action', 'priority']).size()

there are 630 addressable issues in total




issue_type                                       intersection_type     action       priority
Between organisations - Historic England to LPA  > 90% combined match  remap        high        302
                                                 > 90% single match    investigate  medium      233
                                                 edge intersection     investigate  low          17
                                                 unclassified          investigate  low          19
Between organisations - LPA to a different LPA   edge intersection     investigate  low           4
                                                 unclassified          investigate  low           2
Within organisation - LPA                        > 90% combined match  investigate  high         10
                                                 > 90% single match    investigate  medium       36
                                                 edge intersection     investigate  low           4
       

In [20]:
# count of issues by type breakdown

print(f"there are {len(entity_join_all)} issues in total")
print("\n")
entity_join_all.groupby(['issue_type', 'intersection_type', 'action', 'priority', 'hint']).size().head(40)


there are 2766 issues in total




issue_type                                       intersection_type     action       priority  hint           
Between organisations - Historic England to LPA  > 90% combined match  remap        high                         302
                                                 > 90% single match    investigate  medium                       233
                                                 edge intersection     investigate  low                           17
                                                 tiny edge - ignore    ignore                                    467
                                                 unclassified          investigate  low                           19
Between organisations - LPA to a different LPA   edge intersection     investigate  low                            4
                                                 tiny edge - ignore    ignore                                     43
                                                 unclassified          

In [26]:
# write full report table to csv

nicecols = [
    'entity_join', 'entity_1', 'entry_date_1', 'name_1', 'organisation_entity_1',
    'reference_1', 'organisation_name_1', 
    'entity_2', 'entry_date_2', 'name_2', 'organisation_entity_2',
    'reference_2', 'organisation_name_2', 
    'pct_min_intersection', 
    'p_pct_intersect', 's_pct_intersect',
    'date_match', 'entity_old',
    'intersection_type', 'issue_type', 'action',
    'priority', 'hint', 'multiple_issues'
    ]

# entity_join_all[nicecols].to_csv(os.path.join(data_dir, "issues_all.csv"), index=False)

In [None]:
lpa_fund_list = ['Buckinghamshire Council','Doncaster Metropolitan Borough Council','Gloucester City Council','London Borough of Camden','London Borough of Lambeth','London Borough of Southwark','Medway Council','Newcastle City Council','Birmingham City Council','Canterbury City Council','Epsom and Ewell Borough Council','London Borough of Barnet','Gateshead Metropolitan Borough Council','Great Yarmouth Borough Council','Royal Borough of Kingston upon Thames','St Albans City and District Council','Tewkesbury Borough Council','West Berkshire Council','Dorset District Council','Dover District Council','Liverpool City Council','London Borough of Redbridge','London Borough of Waltham Forest','North Lincolnshire Council','North Somerset Council','Salford City Council','Wirral Borough Council']

lpa_fund_issues = entity_join_all[
    entity_join_all["organisation_name_1"].isin(lpa_fund_list) | entity_join_all["organisation_name_2"].isin(lpa_fund_list)
    ]

# export issues list for all funded lpas
lpa_fund_issues[nicecols].to_csv(os.path.join(data_dir, "issues_all-funded_LPAs.csv"), index=False)

## Checking issues for funded LPAs - external duplicates (LPA to Historic England)

In [200]:
# summarise and output all non-ignore issues
issues_2b_df = entity_join_all.loc[
    (entity_join_all["issue_type"] == "Between organisations - Historic England to LPA") & 
    (entity_join_all["action"] != "ignore") &
    (entity_join_all["organisation_name_1"].isin(lpa_fund_list))].copy()

# add in extra LPA - HE overlap types field for this table
issues_2b_df.loc[:, "org_overlap_type"] = np.select(
    [
        (issues_2b_df["p_pct_intersect"] >= MATCH_LOWER_THRESH) & (issues_2b_df["s_pct_intersect"] >= MATCH_LOWER_THRESH),
        (issues_2b_df["p_pct_intersect"] <= EDGE_UPPER_THRESH) & (issues_2b_df["s_pct_intersect"] <= EDGE_UPPER_THRESH),
        (issues_2b_df["p_pct_intersect"] >= MATCH_LOWER_THRESH),
        (issues_2b_df["s_pct_intersect"] >= MATCH_LOWER_THRESH)
    ],
    [
        "LPA and HE polygons closely match", "LPA and HE edges overlap", 
        "LPA polygon covered by larger Historic England polygon", "LPA polygon covers smaller Historic England polygon"
    ],
    default = "Ambiguous overlap of LPA and Historic England polygons"
)

# issues_2b_df[nicecols + ["org_overlap_type"]].to_csv(os.path.join(data_dir, "issues_type_2b-between_org-HE_to_LPA-funded_LPAs.csv"))

# summarise
issues_2b_df.groupby(["issue_type", "intersection_type", "action", "priority", "org_overlap_type"]).size()

issue_type                                       intersection_type     action       priority  org_overlap_type                                      
Between organisations - Historic England to LPA  > 90% combined match  remap        high      LPA and HE polygons closely match                         193
                                                 > 90% single match    investigate  medium    LPA polygon covered by larger Historic England polygon     18
                                                                                              LPA polygon covers smaller Historic England polygon         5
                                                 edge intersection     investigate  low       LPA and HE edges overlap                                    3
                                                 unclassified          investigate  low       Ambiguous overlap of LPA and Historic England polygons      4
dtype: int64

In [203]:
# Check no. of issues per LPA 
issues_2b_df.groupby(["organisation_entity_1", "organisation_name_1"]).size().sort_values(ascending = False)

organisation_entity_1  organisation_name_1                   
75                     Canterbury City Council                   98
109                    Doncaster Metropolitan Borough Council    45
212                    Medway Council                            24
152                    Great Yarmouth Borough Council            21
145                    Gloucester City Council                   14
192                    London Borough of Lambeth                  8
188                    Royal Borough of Kingston upon Thames      4
261                    London Borough of Redbridge                4
329                    London Borough of Southwark                2
67                     Buckinghamshire Council                    1
111                    Dover District Council                     1
366                    London Borough of Waltham Forest           1
dtype: int64

### Map for LPA analysis

In [204]:
# get issues for selected LPA
org_name = "Great Yarmouth Borough Council"


issues_yarm = issues_2b_df[
    (issues_2b_df["issue_type"] == "Between organisations - Historic England to LPA") &
    # (issues_2b_df["priority"].isin(["medium", "high"])) &
    (issues_2b_df["organisation_name_1"] == org_name)]

nrow(issues_yarm)
issues_yarm.groupby(["issue_type", "intersection_type", "action", "priority", "org_overlap_type"]).size()

No. of records in df: 21


issue_type                                       intersection_type     action       priority  org_overlap_type                                      
Between organisations - Historic England to LPA  > 90% combined match  remap        high      LPA and HE polygons closely match                         12
                                                 > 90% single match    investigate  medium    LPA polygon covered by larger Historic England polygon     9
dtype: int64

In [187]:
# Get geometries for entities with issues and split into LPA and Historic England
issue_geoms = entity_gdf[entity_gdf["entity"].isin(get_issue_entities(issues_yarm))].to_crs(4326)

lpa_geoms = issue_geoms[issue_geoms["organisation_entity"] == 152]
he_geoms = issue_geoms[issue_geoms["organisation_entity"] == 16]

# join the overlap type field onto the LPA geoms to use in the map
lpa_geoms = lpa_geoms.merge(
    issues_2b_df[["entity_1", "org_overlap_type"]].drop_duplicates(),
    how = "left",
    left_on = "entity",
    right_on = "entity_1"
)

# lpa_geoms

In [213]:


# Define colours
color_dict = dict(
    zip(
        lpa_geoms["org_overlap_type"].drop_duplicates().values, 
        ["#fc8d62", "#8da0cb", "#e78ac3"])
)

# map
m = folium.Map(
    location=[52.60707, 1.728],
    tiles="CartoDB positron",
    zoom_start=11,
)

popup = folium.GeoJsonPopup(fields=["name", "organisation_name", "org_overlap_type"])

# styling for LPA
folium.GeoJson(
    lpa_geoms.to_json(),
    popup=popup,
    name = "Yarmouth",
    style_function=lambda feature: {
        "fillColor": color_dict[feature["properties"]["org_overlap_type"]],
        "color": color_dict[feature["properties"]["org_overlap_type"]],
        "weight": 2,
        "fillOpacity": 0.5,
    },
    highlight_function=lambda feature: {
        "fillColor": "#ffff00"
    }
).add_to(m)

popup2 = folium.GeoJsonPopup(fields=["name", "organisation_name"])

# styling for HE
folium.GeoJson(
    he_geoms.to_json(),
    popup=popup2,
    name = "Historic England",
    show = False,
    style_function=lambda feature: {
        "fillColor": None,
        "color": "green",
        "dashArray": "5, 5",
        "weight": 2,
        "fillOpacity": 0.1,
    },
    highlight_function=lambda feature: {
        "fillColor": "green",
        "fillOpacity": 0.5
    }
).add_to(m)


folium.LayerControl().add_to(m)

org_formatted = org_name.replace(" ", "_")
m.save(f"issues_map-{org_formatted}.html")

## Checking issues for funded LPAs - internal duplicates

In [None]:
# export issues list for all funded lpas
# lpa_fund_issues[nicecols].to_csv("temp_issues_funded.csv")

In [None]:
# epsom ones to remove
# entity_gdf[(entity_gdf["organisation_entity"] == 129) & (entity_gdf["reference"].apply(lambda x: len(x)) < 5)].to_csv("temp_epsom_to_remove.csv")

In [140]:
# read in current lambeth endpoint and outer join to existing entities
lambeth_endpoint_gdf = gpd.read_file("https://gis.lambeth.gov.uk/arcgis/rest/services/LambethConservationAreas/MapServer/0/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&havingClause=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&returnExtentOnly=false&datumTransformation=&parameterValues=&rangeValues=&quantizationParameters=&featureEncoding=esriDefault&f=geojson")

lambeth_endpoint_gdf = lambeth_endpoint_gdf[["CA_REF_NO"]]
lambeth_endpoint_gdf["record_in_endpoint"] = True

# nrow(lambeth_endpoint_gdf)
# lambeth_endpoint_gdf.head()

entity_df[entity_df["organisation_entity"] == 192][["entity", "reference", "name"]].merge(
    lambeth_endpoint_gdf,
    how =  "outer",
    left_on  = "reference", right_on = "CA_REF_NO"
).to_csv("../data/geo_analysis/funded_lpa_checks/temp_lambeth_entity_endpoint_check.csv")

ERROR:fiona._env:PROJ: internal_proj_identify: /Users/gslater/miniconda3/envs/pdp_jupyter/share/proj/proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation.


In [141]:
epsom_endpoint_gdf = pd.read_csv("../data/geo_analysis/funded_lpa_checks/Epsom_conservation-area_WFS.csv")
epsom_endpoint_gdf = epsom_endpoint_gdf[["name", "reference"]]
epsom_endpoint_gdf["record_in_endpoint"] = True

nrow(epsom_endpoint_gdf)
epsom_endpoint_gdf.head()

epsom_pdp_gdf = gpd.read_file("https://www.planning.data.gov.uk/entity.geojson?organisation_entity=129&dataset=conservation-area&limit=100")
epsom_pdp_gdf["record_in_pdp"] = True

nrow(epsom_pdp_gdf)
# epsom_pdp_gdf.head()

epsom_pdp_gdf[["entity", "reference", "name", "record_in_pdp"]].merge(
    epsom_endpoint_gdf,
    how =  "outer",
    on  = "reference"
).to_csv("../data/geo_analysis/funded_lpa_checks/temp_epsom_entity_endpoint_check.csv")

No. of records in df: 21


ERROR:fiona._env:PROJ: internal_proj_identify: /Users/gslater/miniconda3/envs/pdp_jupyter/share/proj/proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation.


No. of records in df: 41
