In [None]:
import urllib
import pandas as pd
import shapely.wkt
from shapely.geometry import shape
from shapely.errors import WKTReadingError
from shapely.geometry import Point
import json
from urllib.request import urlopen

In [None]:
def get_all_organisations():
    params = urllib.parse.urlencode({
        "sql": f"""
        select organisation, name, entity as organisation_entity, statistical_geography
        from organisation
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/digital-land.csv?{params}"
    df = pd.read_csv(url)
    return df

def get_la_district_json(lpa_ref):
    params = urllib.parse.urlencode({
        "sql": f"""
        select json
        from entity
        where reference = '{lpa_ref}'
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/entity.csv?{params}"
    df = pd.read_csv(url)
    try:
        return df.loc[0,"json"]
    except KeyError:
        return None

def get_brownfield_sites_for_organisation(organisation_entity_number):
    params = urllib.parse.urlencode({
        "sql": f"""
        select json, point, reference, organisation_entity
        from entity
        where organisation_entity = '{organisation_entity_number}'
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/brownfield-land.csv?{params}"
    df = pd.read_csv(url)
    return df

In [None]:
def get_LPA_multipolygon(reference):
    params = urllib.parse.urlencode({
        "sql": f"""
        select geometry
        from entity
        where reference = '{reference}'
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/entity.csv?{params}"
    df = pd.read_csv(url)
    try:
        return df.loc[0,"geometry"]
    except KeyError:
        return None

def get_site_point(collection_name, entity_number):
    params = urllib.parse.urlencode({
        "sql": f"""
        select point
        from entity
        where entity = '{entity_number}'
        """,
        "_size": "max"
        })
    url = f"https://datasette.planning.data.gov.uk/{collection_name}.csv?{params}"
    df = pd.read_csv(url)
    return df.loc[0,"point"]

def parse_wkt(value):
    try:
        geometry = shapely.wkt.loads(value)
    except WKTReadingError:
        try:
            geometry = shapely.wkt.loads(shape(json.loads(value)).wkt)
            return geometry, "invalid type geojson"
        except Exception:
            return None, "invalid WKT"
    return geometry, None


def make_point(point):
    if point.geom_type == "Point":
        return Point(point)
    else:
        print("Not a point")

In [None]:
def compute_true_location(pt):
    if (not pt.x > -7.0 and pt.x < 2.5 and pt.y > 49.5 and pt.y < 56.0):
        return "Not in Great Britain"
    url = f"https://api.postcodes.io/postcodes?lon={pt.x}&lat={pt.y}"
    response = urlopen(url)
    data = json.loads(response.read())
    try:
        location = data["result"][0]["admin_district"]
        return location
    except Exception:
        return "None found"

In [None]:
include_null_coordinate_data = False
collection="brownfield_land"

df_lpa = get_all_organisations()
df_brownfield_sites_outside_lpa = pd.DataFrame(columns=["Site_Reference", "Organisation", "Organisation_Name", "Point", "Maps_Link", "Admin_District", "distance (Arbitrary Unit)"])
for lpa in df_lpa.itertuples():
    df_brownfield_sites = get_brownfield_sites_for_organisation(lpa.organisation_entity)
    df_brownfield_sites = df_brownfield_sites.merge(df_lpa, left_on="organisation_entity", right_on="organisation_entity")
    if ("local-authority-eng" in lpa.organisation):
        multipol = get_LPA_multipolygon(lpa.statistical_geography)
        if multipol is not None:
            area, issue = parse_wkt(multipol)
            for site in df_brownfield_sites.itertuples():
                if (pd.isnull(site.point) == False):
                    pt = shapely.wkt.loads(site.point)
                    if (pt.within(area) == False):                       
                        url = f"https://api.postcodes.io/postcodes?lon={pt.x}&lat={pt.y}"
                        response = urlopen(url)
                        data = json.loads(response.read())
                        admin_district = compute_true_location(pt)
                        distance = area.boundary.distance(pt)
                        google_maps_link = f"https://maps.google.com/?q={pt.y},{pt.x}"
                        pt_outside_boundary_row = {"Site_Reference": site.reference, "Organisation": lpa.organisation, "Organisation_Name": lpa.name, "Point": site.point, "Maps_Link": google_maps_link, "Admin_District": admin_district, "Distance (Arbitrary Unit)": distance}
                        df_brownfield_sites_outside_lpa = pd.concat([df_brownfield_sites_outside_lpa, pd.DataFrame([pt_outside_boundary_row])] , ignore_index=True)
                elif (include_null_coordinate_data):
                    pt_no_coord_row = {"Site_Reference": site.reference, "Organisation": lpa.organisation, "Organisation_Name": lpa.name, "Point": "No coordinate data"}
                    df_brownfield_sites_outside_lpa = pd.concat([df_brownfield_sites_outside_lpa, pd.DataFrame([pt_no_coord_row])] , ignore_index=True)
            
# TODO:
# change maps link to highlight lpa boundary AND site point
# perform checks on points to determine why they are outside lpa, missing -, coords swapped etc.
# grab site address from json, use in compute_true_location?
        

In [None]:
df_brownfield_sites_outside_lpa_sorted = df_brownfield_sites_outside_lpa.sort_values(by=["Distance (Arbitrary Unit)"], ascending=False)
# df_brownfield_sites_outside_lpa_counts = df_brownfield_sites_outside_lpa.organisation.value_counts()
# df_brownfield_sites_outside_lpa_sorted = df_brownfield_sites_outside_lpa.set_index("organisation").loc[df_brownfield_sites_outside_lpa_counts.index].reset_index()
df_brownfield_sites_outside_lpa_sorted = df_brownfield_sites_outside_lpa_sorted.reset_index(drop=True)
df_brownfield_sites_outside_lpa_sorted.to_csv("brownfield_sites_outside_lpa.csv")
display(df_brownfield_sites_outside_lpa_sorted)