# Brownfield sites collection

Create an index of brownfield land register files collected from Local Planning Authorities.

In [1]:
# load collection headers into an index

import json
import glob
import re

collection = {"header": {}}

def add(h, category, key):
    collection.setdefault(category, {})
    if category in h:
        collection[category].setdefault(h[category], []).append(key)

p = re.compile("^.*/([\d-]+)/(\w+).json")

for path in glob.glob("collection/headers/*/*.json"):
    date, key = p.match(path).groups()

    h = json.load(open(path))

    for item in ["request-headers", "response-headers"]:
        h.pop(item, None)

    collection["header"][key] = h

    add(h, "dataset", key)
    add(h, "organisation", key)
    add(h, "status", key)
    add(h, "exception", key)

    if "body" in h:
        add(h, "body", key)

In [14]:
# build index of organisations
import csv
import requests

organisations = {}

def load_organisations(key, fields, url=None, prefix=None):
    if url == None:
        url = 'https://%s.register.gov.uk/records.csv?page-index=1&page-size=5000' % (key)
    if prefix == None:
        prefix = key + ":"
    for row in csv.DictReader(requests.get(url).content.decode('utf-8').splitlines()):
        curie = "%s%s" % (prefix, row[key])
        organisations.setdefault(curie, {})
        for f in fields:
            organisations[curie][f] = row[f] 

load_organisations('local-authority-eng', ['name', 'official-name', 'end-date'])
load_organisations('government-organisation', ['name', 'website', 'end-date'])

# add website and GSS codes for local authorities    
load_organisations("local-authority", ["website", "ons-code"], prefix="", url="https://raw.githubusercontent.com/digital-land/alpha-data/master/local-authorities.csv")                         

In [3]:
for status in collection["status"]:
   print(status, len(collection["status"][status]))

print()

for organisation in organisations:  
    print(organisation,
          [collection["header"][key].get("status", "") + collection["header"][key].get("exception", "") 
          for key in collection["organisation"][organisation]],

    )

200 474
404 162
403 4
500 2

development-corporation:1 ['200', '200']
development-corporation:2 ['200', '200']
local-authority-eng:ADU ['200', '200']
local-authority-eng:ALL ['200', '200']
local-authority-eng:AMB ['200', '200']
local-authority-eng:ARU ['200', '200']
local-authority-eng:ASF ['200', '200']
local-authority-eng:ASH ['404', '404']
local-authority-eng:AYL ['200', '200']
local-authority-eng:BAB ['200', '200']
local-authority-eng:BAE ['404', '404']
local-authority-eng:BAI ['404', '404']
local-authority-eng:BAN ['404', '404']
local-authority-eng:BAR ['200', '200']
local-authority-eng:BAS ['200', '200']
local-authority-eng:BBD ['404', '404']
local-authority-eng:BDF ['200', '200']
local-authority-eng:BDG ['200', '200']
local-authority-eng:BEN ['200', '200']
local-authority-eng:BEX ['200', '200']
local-authority-eng:BIR ['200', '200']
local-authority-eng:BLA ['404', '404']
local-authority-eng:BMH ['404', '404']
local-authority-eng:BNE ['200', '200']
local-authority-eng:BNH ['200',