# Title
**Author**:  Greg Slater <br>
**Date**:  24th September 2024 <br>
**Dataset Scope**: `dataset` <br>
**Report Type**: Ad-hoc analysis <br>

## Purpose


In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import urllib
# from sqlite_query_functions import DatasetSqlite
from datetime import datetime

pd.set_option("display.max_rows", 100)

td = datetime.today().strftime('%Y-%m-%d')
data_dir = "../../data/BFL_old_orgs/"
os.makedirs(data_dir, exist_ok=True)
# fn = os.path.join(data_dir, f"FILENAME_{td}.csv")


## Data Import

In [190]:
bfl = pd.read_csv("https://files.planning.data.gov.uk/dataset/brownfield-land.csv")
print(len(bfl))

35125


In [90]:
bfl_BBC = bfl[bfl["organisation-entity"] == 47].copy()
bfl_BCP = bfl[bfl["organisation-entity"] == 54].copy()

print(len(bfl_BBC))
print(len(bfl_BCP))

50
344


In [142]:
def get_all_organisations():
    params = urllib.parse.urlencode({
        "sql": f"""
        select entity as organisation_entity, name as org_name, organisation, dataset as org_type, end_date
        from organisation
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/digital-land.csv?{params}"
    df = pd.read_csv(url)
    return df

org_df = get_all_organisations()

org_df.set_index("organisation_entity", inplace=True)
org_dict = org_df[["org_name"]].to_dict("index")

In [154]:
def check_matches(org_x, org_y, match_fields):

    dfx = bfl[bfl["organisation-entity"] == org_x].copy()
    dfy = bfl[bfl["organisation-entity"] == org_y].copy()

    namex = org_dict[org_x]["org_name"]
    namey = org_dict[org_y]["org_name"]
    print(f"no. of entities in left table ({namex}) = {len(dfx)}")
    print(f"no. of entities in right table  ({namey})= {len(dfy)}")

    cross_ref_df = pd.merge(
        dfx,
        dfy,
        how = "outer",
        on = match_fields,
        indicator=True,
    )[match_fields + ["_merge"]]

    # d["ref_matches"] = d["cross_ref_df"][d["cross_ref_df"]["_merge"] == "both"]["reference"]
    
    return cross_ref_df

cr_test = check_matches(47, 54, ["reference"])
cr_test_matches = cr_test[cr_test["_merge"] == "both"]["reference"]

cr_test.value_counts("_merge")

no. of entities in left table (Bournemouth Borough Council) = 50
no. of entities in right table  (Bournemouth, Christchurch and Poole Council)= 344


_merge
right_only    296
both           48
left_only       2
Name: count, dtype: int64

In [105]:
def compare_match_fields(org_x, org_y, ref_matches, fields):

    dfx = bfl[
        (bfl["organisation-entity"] == org_x) &
        (bfl["reference"].isin(ref_matches))].copy()
    
    dfy = bfl[
        (bfl["organisation-entity"] == org_y) &
        (bfl["reference"].isin(ref_matches))].copy()

    # subset each table to only records which match on reference
    matches_df1 = dfx[dfx["reference"].isin(ref_matches)].copy()
    matches_df1.set_index("reference", inplace=True)
    matches_df1.sort_index(inplace=True)

    matches_df2 = dfy[dfy["reference"].isin(ref_matches)].copy()
    matches_df2.set_index("reference", inplace=True)
    matches_df2.sort_index(inplace=True)

    # full comparison
    full_comp = matches_df1 == matches_df2

    # calculate match % for each field and make into neat wide table
    results_df_nar = (full_comp[fields].sum(axis=0) / len(full_comp)).to_frame(name = "x_y_pct_match")
    results_df_wide = results_df_nar.reset_index().pivot_table(columns = "index", values = "x_y_pct_match")
    return results_df_wide

t = compare_match_fields(47, 54, cr_test_matches, ["name", "point", "site", "site-address"])
t

index,name,point,site,site-address
x_y_pct_match,1.0,0.0,0.0,0.708333


## Analysis

### Bournemouth, Christchurch and Poole Council 

In [164]:
ox = 47
oy = 54

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Bournemouth Borough Council) = 50
no. of entities in right table  (Bournemouth, Christchurch and Poole Council)= 344

Checking entity match on `reference` field
_merge
right_only    296
both           48
left_only       2
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index,name,point,site,site-address
x_y_pct_match,1.0,0.0,0.0,0.708333


In [162]:
ox = 78
oy = 54

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Christchurch Borough Council) = 41
no. of entities in right table  (Bournemouth, Christchurch and Poole Council)= 344

Checking entity match on `reference` field
_merge
right_only    305
both           39
left_only       2
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index,name,point,site,site-address
x_y_pct_match,1.0,0.0,0.0,1.0


In [166]:
ox = 254
oy = 54

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Borough of Poole) = 122
no. of entities in right table  (Bournemouth, Christchurch and Poole Council)= 344

Checking entity match on `reference` field
_merge
right_only    255
both           89
left_only      33
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index,name,point,site,site-address
x_y_pct_match,1.0,0.0,0.0,0.741573


### Buckinghamshire Council

In [170]:
ox = 32
oy = 67

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Aylesbury Vale District Council) = 17
no. of entities in right table  (Buckinghamshire Council)= 139

Checking entity match on `reference` field
_merge
right_only    138
left_only      16
both            1
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index,name,point,site,site-address
x_y_pct_match,1.0,1.0,0.0,1.0


In [171]:
ox = 82
oy = 67

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Chiltern District Council) = 51
no. of entities in right table  (Buckinghamshire Council)= 139

Checking entity match on `reference` field
_merge
right_only    115
left_only      27
both           24
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index,name,point,site,site-address
x_y_pct_match,1.0,0.0,0.0,0.125


In [173]:
ox = 280
oy = 67

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (South Bucks District Council) = 37
no. of entities in right table  (Buckinghamshire Council)= 139

Checking entity match on `reference` field
_merge
right_only    120
both           19
left_only      18
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index,name,point,site,site-address
x_y_pct_match,1.0,0.0,0.0,0.263158


In [175]:
ox = 393
oy = 67

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Wycombe District Council) = 47
no. of entities in right table  (Buckinghamshire Council)= 139

Checking entity match on `reference` field
_merge
right_only    139
left_only      47
both            0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


### Dorset

In [178]:
ox = 121
oy = 112

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (East Dorset District Council) = 29
no. of entities in right table  (Dorset Council)= 83

Checking entity match on `reference` field
_merge
right_only    83
left_only     29
both           0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [179]:
ox = 222
oy = 112

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (North Dorset District Council) = 16
no. of entities in right table  (Dorset Council)= 83

Checking entity match on `reference` field
_merge
right_only    83
left_only     16
both           0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [180]:
ox = 258
oy = 112

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Purbeck District Council) = 12
no. of entities in right table  (Dorset Council)= 83

Checking entity match on `reference` field
_merge
right_only    83
left_only     12
both           0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [181]:
ox = 360
oy = 112

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (West Dorset District Council) = 47
no. of entities in right table  (Dorset Council)= 83

Checking entity match on `reference` field
_merge
right_only    83
left_only     47
both           0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [183]:
ox = 365
oy = 112

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Weymouth and Portland Borough Council) = 50
no. of entities in right table  (Dorset Council)= 83

Checking entity match on `reference` field
_merge
right_only    83
left_only     50
both           0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


### East Suffolk

In [184]:
ox = 326
oy = 132

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Suffolk Coastal District Council) = 18
no. of entities in right table  (East Suffolk Council)= 141

Checking entity match on `reference` field
_merge
right_only    141
left_only      18
both            0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [186]:
ox = 356
oy = 132

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Waveney District Council) = 33
no. of entities in right table  (East Suffolk Council)= 141

Checking entity match on `reference` field
_merge
right_only    141
left_only      33
both            0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [187]:
ox = 140
oy = 386

cross_ref = check_matches(ox, oy, ["reference"])
cr_matches = cross_ref[cross_ref["_merge"] == "both"]["reference"]

print()
print("Checking entity match on `reference` field")
print(cross_ref.value_counts("_merge"))

print()
print("comparing field match % for entities matched on `reference` field")
compare_match_fields(ox, oy, cr_matches, ["name", "point", "site", "site-address"])

no. of entities in left table (Forest Heath District Council) = 3
no. of entities in right table  (West Suffolk Council)= 28

Checking entity match on `reference` field
_merge
right_only    28
left_only      3
both           0
Name: count, dtype: int64

comparing field match % for entities matched on `reference` field


index
x_y_pct_match


In [177]:
cr_matches

Series([], Name: reference, dtype: object)