# Block Counts

This script counts the number of properties in a terrace and uses it to derive a built form for the buildings

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import json
import datetime as dt

In [39]:
# set variables from config file
config_path = os.path.abspath('..')

with open(config_path + '/config.json', 'r') as f:
    config = json.load(f)

polygon_path = config['DEFAULT']['polygon_path']
buildings_fname = config['DEFAULT']['buildings_fname']
built_form_fname = config['DEFAULT']['built_form_fname']

In [3]:
buildings = pd.read_csv(os.path.join(polygon_path,buildings_fname),header=0,delimiter=',')

### 0 distance neighbours

In [4]:
# To reduce the time taken work with a subset of the data. As nn_count represents the count of neighbours 
# who are not next door neighbours (i.e. distance between building geometries is > 0), filtering on nn_count < 4 
# will give a subset of properties with two next door neighbours
b_attached = buildings[(buildings['nn_count'] < 4)].copy()

For neighbours with a 0 distance find the min and max uprn, sort by these and drop duplicates. If two properties are next door neighbours they will appear in the data twice, once with each building as the primary uprn and the other building as the nearest neighbour. To find unique lists of properties in the same block, first need to remove these duplicates

In [5]:
# split into a files which will give 0 distance pairs of buildings
terraced_pd = b_attached[b_attached['d1']==0].copy()
terraced_d2 = b_attached[b_attached['d2']==0].copy()
terraced_d3 = b_attached[b_attached['d3']==0].copy()
terraced_d4 = b_attached[b_attached['d4']==0].copy()

# where a pair of buildings are next door neighbours the min uprn and max uprn will be the same for both rows
terraced_pd['uprn_min'] = b_attached.loc[:,['uprn','uprn1']].min(axis=1)
terraced_pd['uprn_max'] = b_attached.loc[:,['uprn','uprn1']].max(axis=1)
terraced2 = terraced_pd.drop_duplicates(subset=['uprn_min','uprn_max'])

terraced_d2['uprn_min'] = terraced_d2.loc[:,['uprn','uprn2']].min(axis=1)
terraced_d2['uprn_max'] = terraced_d2.loc[:,['uprn','uprn2']].max(axis=1)
terraced_d2_2 = terraced_d2.drop_duplicates(subset=['uprn_min','uprn_max'])

terraced_d3['uprn_min'] = terraced_d3.loc[:,['uprn','uprn3']].min(axis=1)
terraced_d3['uprn_max'] = terraced_d3.loc[:,['uprn','uprn3']].max(axis=1)
terraced_d3_2 = terraced_d3.drop_duplicates(subset=['uprn_min','uprn_max'])

terraced_d4['uprn_min'] = terraced_d4.loc[:,['uprn','uprn4']].min(axis=1)
terraced_d4['uprn_max'] = terraced_d4.loc[:,['uprn','uprn4']].max(axis=1)
terraced_d4_2 = terraced_d4.drop_duplicates(subset=['uprn_min','uprn_max'])

# repeat above steps for all neighbour pairs
terraced3 = pd.concat([terraced2,terraced_d2_2,terraced_d3_2,terraced_d4_2])
terraced3.dropna(subset=['uprn_min','uprn_max'],inplace=True)
terraced3['uprn_min'] = terraced3['uprn_min'].astype(int)
terraced3['uprn_max'] = terraced3['uprn_max'].astype(int)
terraced3 = terraced3.sort_values(by = 'uprn_min')
terraced4 = terraced3.drop_duplicates(subset=['uprn_min','uprn_max'])

In [6]:
# array of all unique pairs of 0 distance neighbours
uprn_min = list(terraced4['uprn_min'])
uprn_max = list(terraced4['uprn_max'])

uprn_min_max = np.array(terraced4[['uprn_min','uprn_max']])

## Combine pairs of 0 distance neighbours

### Exact matches

Function to combine lists of houses with a common uprn, note the lists have been sorted by the minimum uprn to bring lists with a common uprn next to each other

In [9]:
def combine_blocks(uprn_min_max):
    
    ''' 
    Loops through an array combining lists of integers with a common item into one list of unique items. 
    Parameters
      uprn_min_max: numpy array of arrays
    Returns a list and a count of how pairs of lists where combined
    '''
    
    start = dt.datetime.now()
    all_neighbours = list()
    j = len(uprn_min_max)
    yes_count = 0
    for i in range(len(uprn_min_max)):
        if i != j:
            try:
                list1 = list(uprn_min_max[i])
                list2 = list(uprn_min_max[i+1])
                if any(i in list1 for i in list2):
                    all_neighbours.append(list(set(list1+list2)))
                    j = i+1
                    yes_count += 1
                else:
                    all_neighbours.append(list1)
            except:
                list1 = list(uprn_min_max[i])
                all_neighbours.append(list1)
    end = dt.datetime.now()
#     print((end - start).total_seconds())
    print('total number of combination of blocks '+str(len(all_neighbours)))
    print('number of blocks combined was '+str(yes_count))
    return all_neighbours, yes_count

In [8]:
all_neighbours, yes_count = combine_blocks(uprn_min_max)

total number of combination of blocks247859
number of blocks combined was 135424


In [10]:
# example below shows the array now contains a list with three entries (all neighbours)
[y for y in all_neighbours if 10002523671 in y]

[[10002523665, 10002523677, 10002523671]]

In [11]:
# the above function is run until no more combinatons of neighbours can be found this way
while yes_count > 1:
    all_neighbours, yes_count = combine_blocks(all_neighbours)

total number of combination of blocks 186264
number of blocks combined was 61595
total number of combination of blocks 164751
number of blocks combined was 21513
total number of combination of blocks 158542
number of blocks combined was 6209
total number of combination of blocks 157026
number of blocks combined was 1516
total number of combination of blocks 156867
number of blocks combined was 159
total number of combination of blocks 156859
number of blocks combined was 8
total number of combination of blocks 156856
number of blocks combined was 3
total number of combination of blocks 156853
number of blocks combined was 3
total number of combination of blocks 156850
number of blocks combined was 3
total number of combination of blocks 156847
number of blocks combined was 3
total number of combination of blocks 156845
number of blocks combined was 2
total number of combination of blocks 156843
number of blocks combined was 2
total number of combination of blocks 156841
number of block

### Equal min and max 

In some cases the min of one list is the max of another list and vice versa as in the example below:

In [12]:
[y for y in all_neighbours if 100100489336 in y]

[[100100489334, 100100489336], [100100489336, 100100489338]]

In [13]:
# find list index for blocks with 100100489336
[i for i,e in enumerate(all_neighbours) if 100100489336 in e]

[102490, 102492]

In the example above the indices are two places away from each other. The data needs to be sorted in a way to bring these rows together

In [14]:
def list_of_list_to_df(list_of_lists):
    
    '''
    Converts a list of lists into a dataframe
    Parameters
      list_of_lists: a list of lists
    Returns a dataframe
    '''
    
    length = len(sorted(list_of_lists,key=len, reverse=True)[0])
    y=np.array([xi+[None]*(length-len(xi)) for xi in list_of_lists])
    return pd.DataFrame(y)

In [15]:
def get_uprn_min_max(df):
    
    '''
    Calculates the min and max values of each row and converts to values to an integer
    Parameters
      df: a dataframe
    Returns a dataframe
    '''
    
    df['uprn_min'] = df.min(axis=1)
    df['uprn_max'] = df.max(axis=1)
    df['uprn_min'] = df['uprn_min'].astype(int)
    df['uprn_max'] = df['uprn_max'].astype(int)
    return df

In [16]:
def shift_by_2(all_neighbours):
    
    '''
    Calculates the min and max values of each row and converts to values to an integer
    Parameters
      all_neighbours: a list
    Returns a list
    '''
    
    blocks = list_of_list_to_df(all_neighbours)
    blocks = get_uprn_min_max(blocks)

#     print(len(blocks))
#     blocks.drop_duplicates(inplace=True)
#     print(len(blocks))
    
    blocks['max_pre'] = blocks['uprn_max'].shift(2)
    blocks['min_max'] = blocks.apply(lambda row: row['uprn_min'] if row['max_pre']==row['uprn_min'] else row['uprn_max'],axis=1)
    blocks = blocks.sort_values(by = 'min_max')
    blocks.drop(columns=['uprn_min','uprn_max','min_max','max_pre'],inplace=True)

    # turn dataframe back into list for processing
    blocks_rows = list()
    for i in range(len(blocks)):
        blocks_rows.append(list(blocks.iloc[i]))

    sorted_neighbours = [[x for x in y if x is not None] for y in blocks_rows]
    return sorted_neighbours

In [17]:
sorted_neighbours = shift_by_2(all_neighbours)
all_neighbours, yes_count = combine_blocks(sorted_neighbours)

total number of combination of blocks 129383
number of blocks combined was 27453


In [18]:
while yes_count > 1:
    all_neighbours, yes_count = combine_blocks(all_neighbours)

total number of combination of blocks 126313
number of blocks combined was 3070
total number of combination of blocks 126036
number of blocks combined was 277
total number of combination of blocks 126017
number of blocks combined was 19
total number of combination of blocks 126017
number of blocks combined was 0


In [19]:
# Check test blocks above have been combined
[y for y in all_neighbours if 100100489336 in y]
# Not yet as other blocks were combined instead

[[100100489334, 100100489336], [100100489336, 100100489338]]

In [20]:
# find list index for blocks with 100100489336
[i for i,e in enumerate(all_neighbours) if 100100489336 in e]

[76084, 76086]

In [21]:
sorted_neighbours = shift_by_2(all_neighbours)
all_neighbours, yes_count = combine_blocks(sorted_neighbours)
while yes_count > 1:
    all_neighbours, yes_count = combine_blocks(all_neighbours)

total number of combination of blocks 114419
number of blocks combined was 11598
total number of combination of blocks 114145
number of blocks combined was 274
total number of combination of blocks 114139
number of blocks combined was 6
total number of combination of blocks 114139
number of blocks combined was 0


In [22]:
# Check test blocks above have been combined
[y for y in all_neighbours if 100100489336 in y]
# Not yet as other blocks were combined instead

[[100100489328, 100100489330, 100100489332, 100100489334, 100100489336],
 [100100489336, 100100489338]]

In [23]:
# find list index for blocks with 100100489336
[i for i,e in enumerate(all_neighbours) if 100100489336 in e]

[68480, 68482]

In [24]:
sorted_neighbours = shift_by_2(all_neighbours)
all_neighbours, yes_count = combine_blocks(sorted_neighbours)
while yes_count > 1:
    all_neighbours, yes_count = combine_blocks(all_neighbours)

total number of combination of blocks 107265
number of blocks combined was 6874
total number of combination of blocks 107124
number of blocks combined was 141
total number of combination of blocks 107123
number of blocks combined was 1


In [25]:
# Check test blocks above have been combined
[y for y in all_neighbours if 100100489336 in y]
# Yes

[[100100489328,
  100100489330,
  100100489332,
  100100489334,
  100100489336,
  100100489338]]

Repeat until all no more combinations are found

In [27]:
sorted_neighbours = shift_by_2(all_neighbours)
all_neighbours, yes_count = combine_blocks(sorted_neighbours)
iteration_count = 0
while yes_count > 1:
    iteration_count += 1
    print(iteration_count)
    all_neighbours, yes_count = combine_blocks(all_neighbours)

while iteration_count > 1:
    sorted_neighbours = shift_by_2(all_neighbours)
    all_neighbours, yes_count = combine_blocks(sorted_neighbours)
    iteration_count = 0
    while yes_count > 1:
        iteration_count += 1
        print(iteration_count)
        all_neighbours, yes_count = combine_blocks(all_neighbours)

total number of combination of blocks 102422
number of blocks combined was 4701
1
total number of combination of blocks 102344
number of blocks combined was 78
2
total number of combination of blocks 102344
number of blocks combined was 0
total number of combination of blocks 99110
number of blocks combined was 3234
1
total number of combination of blocks 99062
number of blocks combined was 48
2
total number of combination of blocks 99062
number of blocks combined was 0
total number of combination of blocks 96491
number of blocks combined was 2571
1
total number of combination of blocks 96457
number of blocks combined was 34
2
total number of combination of blocks 96456
number of blocks combined was 1
total number of combination of blocks 94417
number of blocks combined was 2039
1
total number of combination of blocks 94398
number of blocks combined was 19
2
total number of combination of blocks 94398
number of blocks combined was 0
total number of combination of blocks 92748
number of

### All other combinations

In [29]:
def combine_blocks_all(current_list):
    start = dt.datetime.now()
    yes_count = 0
    all_neighbours = list()
    length = len(current_list)
    k = []
    for i in range(length):
        block1 = current_list[i]
        combo = block1
        if i not in k:
            for j in range(i+1,length):
                block2 = current_list[j]
                if any(i in block1 for i in block2):
                    combo = list(set(block1+block2))
                    k.append(j)
                    yes_count += 1
                    break
            all_neighbours.append(combo)
    end = dt.datetime.now()
#     print((end - start).total_seconds())
    print('total number of combination of blocks '+str(len(all_neighbours)))
    print('number of blocks combined was '+str(yes_count))
    return all_neighbours, yes_count

In [30]:
all_neighbours, yes_count = combine_blocks_all(all_neighbours)

total number of combination of blocks 80483
number of blocks combined was 7339


In [31]:
while yes_count > 0:
    all_neighbours, yes_count = combine_blocks_all(all_neighbours)

total number of combination of blocks 78506
number of blocks combined was 2002
total number of combination of blocks 78007
number of blocks combined was 507
total number of combination of blocks 77864
number of blocks combined was 143
total number of combination of blocks 77828
number of blocks combined was 36
total number of combination of blocks 77824
number of blocks combined was 4
total number of combination of blocks 77824
number of blocks combined was 0


In [32]:
# convert to a dataframe
blocks = list_of_list_to_df(all_neighbours)

In [33]:
# count number of entries in each list
blocks['block_count'] = blocks.astype(bool).sum(axis=1)
blocks['uprn'] = blocks[0].astype(int)

In [34]:
# combine uprn columns to merge back to original data. As each combination is unique the number of properties in the 
# terrace needs to be repeated for each entry in the list
blocks_all_uprn = pd.DataFrame()
for col in range(blocks.shape[1]-2):
    tmp_df = blocks[[col,'block_count']]
    tmp_df.rename(columns = {col:'uprn'},inplace=True)
    tmp_df.dropna(subset=['uprn'],inplace=True)
    blocks_all_uprn = pd.concat([blocks_all_uprn,tmp_df])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [35]:
# join back to original file
blocks_all_uprn['uprn'] = blocks_all_uprn['uprn'].astype(int)
built_form = buildings.merge(blocks_all_uprn,how = 'left', on = 'uprn')

In [37]:
def built_form(block_count,nn_count):
    
    ''' 
    Defines a rule for deriving built form based on number of neighbours a property has and how many properties make 
    up the terrace/block
      block_count: count of properties in the terrace
      nn_count: count of nearest neighbours where the geometric distance is more than 0
    Returns a list and a count of how pairs of lists where combined
    '''
    
    # 3 or more properties in the terrace and one 0 distance neighbour
    if(block_count >= 3 and nn_count == 4):
        return 'end-terrace'
    # 3 or more properties in the terrace and two 0 distance neighbours
    elif(block_count >= 3 and nn_count <4):
        return 'mid-terrace'
    # one 0 distance neighbour
    elif(nn_count == 4):
        return 'semi-detached'
    # catch all for any other properties with a 0 distance neighbour
    elif(nn_count < 5):
        return 'attached'
    # no 0 distance neighbours therefore detached
    else:
        return 'detached'

In [38]:
built_form['built_form'] = built_form.apply(lambda row: built_form(row['block_count'],row['nn_count']),axis=1)

In [None]:
built_form.to_csv(os.path.join(polygon_path,built_form_fname),index=False)