In [1]:
""" Script to take planning applications and match to OSAddressBase addresses

Written by: Christine Langston, April 2024
"""
import pandas as pd
import time
import numpy as np
import copy
import re

In [2]:
#create a function to iterate through csv. use for OSAddressBase file
def read_csv(file_name, columns):
    for chunk in pd.read_csv(file_name, chunksize=10000, usecols=columns, 
                            dtype={'urpn':str, 'parent_urpn': str, 'class': str, 'latitude': float, 
                                                  'longitude': float, 'country': str}):
    
        #if chunk['country'] == 'E':
        yield chunk


In [3]:
#takes a merged dataset and separates matched addresses and not matched
def separate_matches(dataset, column_name, match_strategy):
    dataset_match = dataset.drop(dataset[pd.isna(dataset[column_name]) == True].index)
    dataset_no_match = dataset.drop(dataset[pd.isna(dataset[column_name]) == False].index)
    dataset_match['match_strategy'] = match_strategy
    return dataset_match, dataset_no_match

In [4]:
#function to merge and then split based on if an address was matched  
def my_merge(left, right, left_on, right_on): 
    merged = left.merge(right, how = 'left', left_on = left_on, right_on = right_on)
    merged_match = merged.drop(merged[pd.isna(merged['uprn']) == True].index) 
    merged_no_match = merged.drop(merged[pd.isna(merged['uprn']) == False].index) 
    return merged_match, merged_no_match

In [5]:
%%time
#### READ IN THE DATA IF EXPORTED 
resi_AB = pd.read_csv('data/resi_AB_cleaned_parsed_030524.csv', low_memory = False)


CPU times: user 3min 39s, sys: 4min 15s, total: 7min 55s
Wall time: 11min 26s


In [6]:
%%time
other_AB = pd.read_csv('data/other_AB_cleaned_parsed_030524.csv',low_memory = False)



CPU times: user 36 s, sys: 9.68 s, total: 45.7 s
Wall time: 51.9 s


In [7]:
#load in the data that hasn't been matched yet
london_data = pd.read_csv('data/London2_no_match_2603.csv',low_memory = False)

In [8]:
resi_AB.head()

Unnamed: 0,uprn,class,parent_uprn,latitude,longitude,country,legal_name,sub_building_name,building_name,building_number,...,street_description,dependent_locality,locality,town_name,administrative_area,post_town,postcode,postcode_locator,parsed_address1,parsed_address2
0,10012778289,RD02,,54.036005,-2.340833,E,,,,,...,OLD OLIVER LANE TO RAGGED HALL,,RATHMELL,SETTLE,NORTH YORKSHIRE,,,BD24 0LP,"SCOUTBER END FARM, OLD OLIVER LANE TO RAGGED HALL",OLD OLIVER LANE TO RAGGED HALL
1,10013819934,RG02,10013820000.0,54.780687,-1.510651,E,,,,,...,DOWSEY ROAD,,,SHERBURN,DURHAM,,,DH6 1JH,"GARAGE SITE 53, DOWSEY ROAD",DOWSEY ROAD
2,10014309637,RD04,200003700000.0,51.26651,0.497553,E,,,,,...,MILTON STREET,,,MAIDSTONE,KENT,,,ME16 8LD,"ROOM 1, 85, MILTON STREET",MILTON STREET
3,10033213520,RD01,10002820000.0,53.903252,-0.158754,E,,,,,...,HORNSEA BURTON ROAD,,,HORNSEA,EAST RIDING OF YORKSHIRE,,,HU18 1TL,"33 SOUTHFIELD, LONGBEACH LEISURE PARK, HORNSEA...",HORNSEA BURTON ROAD
4,10033545757,RD06,100023500000.0,51.514983,-0.17926,E,,,,,...,WESTBOURNE TERRACE,,,LONDON,CITY OF WESTMINSTER,,,W2 3UJ,"SECOND FLOOR, 58, WESTBOURNE TERRACE",WESTBOURNE TERRACE


Some data cleaning steps to clean up the Nan and numerical data coming from the CSV

In [9]:
### ------ DATA CLEANING  ----------
#edit the parent uprn from OSAddress base to fill out to be strings with 12 digits and leading zeros
#replace nan
resi_AB = resi_AB.replace([np.nan, -np.inf], 0)

#cast as integer
resi_AB['parent_uprn'] = resi_AB['parent_uprn'].astype('Int64')
resi_AB['uprn'] = resi_AB['uprn'].astype('Int64')


In [10]:
### ------ DATA CLEANING  ----------
#cast as string 
resi_AB['parent_uprn'] = resi_AB['parent_uprn'].astype(str)
resi_AB['uprn'] = resi_AB['uprn'].astype(str)

#fill in with left side padding zeros 
resi_AB['parent_uprn'] = resi_AB['parent_uprn'].apply(lambda x: '{0:0>12}'.format(x))
resi_AB['uprn'] = resi_AB['uprn'].apply(lambda x: '{0:0>12}'.format(x))


In [11]:
#do the same cleaning as above but for the OTHER addresses
other_AB = other_AB.replace([np.nan, -np.inf], 0)

#cast as integer
other_AB['parent_uprn'] = other_AB['parent_uprn'].astype('Int64')
other_AB['uprn'] = other_AB['uprn'].astype('Int64')

In [12]:
### ------ DATA CLEANING  ----------
#cast as string 
other_AB['parent_uprn'] = other_AB['parent_uprn'].astype(str)
other_AB['uprn'] = other_AB['uprn'].astype(str)

#fill in with left side padding zeros 
other_AB['parent_uprn'] = other_AB['parent_uprn'].apply(lambda x: '{0:0>12}'.format(x))
other_AB['uprn'] = other_AB['uprn'].apply(lambda x: '{0:0>12}'.format(x))


In [13]:
london_data['uprn_x']

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
3251   NaN
3252   NaN
3253   NaN
3254   NaN
3255   NaN
Name: uprn_x, Length: 3256, dtype: float64

In [18]:
### ------ DATA CLEANING  ----------

#BATCH 1 london_data has extra quotation marks, need to remove
#london_data['uprn'] = london_data['uprn'].apply(lambda x: x.strip("''") if not pd.isna(x) else x)


In [13]:
### ------ DATA CLEANING  ----------
london_data['uprn_x'] = london_data['uprn_x'].astype('Int64')



In [14]:
#cast the strings and make sure it looks good 
london_data['uprn_x'] = london_data['uprn_x'].astype('str') #apply(lambda x: str(x) if not pd.isna(x) else x)

london_data['uprn_x'] = london_data['uprn_x'].apply(lambda x: '{0:0>12}'.format(x) if not pd.isna(x) else x)

In [15]:
### ------ DATA CLEANING  ----------

london_data = london_data.replace('00000000<NA>',np.NaN)

Ignore the creation of the columns below if matching a batch for a second, third, etc. time

In [20]:
### ------ DATA CLEANING  ----------
#street address from site_name_LPA
london_data['parsed_street_LPA'] = london_data['site_name_LPA'].apply(lambda x: re.findall("[0-9]+.-?.[0-9]+?\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) and '-' in x
                                                                  else (re.findall("[0-9]+\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) else []))


In [21]:
london_data['parsed_street_LPA'] =  london_data['parsed_street_LPA'].apply(lambda x: x[0][0] + x[0][1]  if len(x) > 0 else None)

In [22]:
#cleaning - street address
london_data['parsed_street_GLA'] = london_data['site_name_GLA'].apply(lambda x: re.findall("[0-9]+.-?.[0-9]+?\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) and '-' in x
                                                                  else (re.findall("[0-9]+\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) else []))


In [23]:
london_data['parsed_street_GLA'] =  london_data['parsed_street_GLA'].apply(lambda x: x[0][0] + x[0][1]  if len(x) > 0 else None)

In [24]:
#if the original street was empty, then we want to use the LPA Or GLA parsed name as street_name 

london_data['street_name'] = np.where(london_data['street_name'].isnull(), np.where(london_data['parsed_street_LPA'].isnull(), london_data['parsed_street_GLA'], london_data['parsed_street_LPA']), london_data['street_name'] )

In [27]:
# DATA EXPLORATION - EXPORT CSV IF NEEDED 
#london_data.to_csv('batch2_cleaned.csv', index = False)

Continue here to clean data for all batches

In [16]:
# added may 7 14:50p
# create new column with the number, street description, and the site name 
london_data['concat_addr'] = np.where(london_data['site_name_clean'].isnull(), '', london_data['site_name_clean'] +  ', ' ) + london_data['site_number_clean'] + ', ' +  london_data['street_name']

In [17]:
#make london_data all into capitals
london_data['street_name'] = london_data['street_name'].str.upper()

In [18]:
#remove unwanted characters
london_data['postcode_clean'] = london_data['postcode_clean'].replace('x000D__x000D_\n', '')

In [19]:
london_data = london_data.replace({'_x000D__x000D_\n': ' ', '_x000d__x000d_\n': ' ' }, regex = True)

In [20]:
#remove white space
london_data['postcode_clean'] = london_data['postcode_clean'].apply(lambda x: str(x).strip())

In [21]:
london_data['site_name_GLA_no_pc'] = london_data.apply(lambda row: str(row['site_name_GLA']).replace(', ' + row['postcode_clean'], ''), axis = 1)
#substring of site_name_GLA without the postcode .... 

In [22]:
london_data['site_name_LPA_no_pc'] = london_data.apply(lambda row: str(row['site_name_LPA']).replace(row['postcode_clean'], ''), axis = 1)


In [23]:
### ------ DATA CLEANING  ----------

#Address matching data clean, make building number into a string 
resi_AB['building_number'] = resi_AB['building_number'].astype('Int64').astype('str') 

In [24]:
resi_AB['pao_start_number'] = resi_AB['pao_start_number'].astype('Int64').astype('str') 


In [25]:
other_AB['building_number'] = other_AB['building_number'].astype('Int64').astype('str') 

In [26]:
other_AB['pao_start_number'] = other_AB['pao_start_number'].astype('Int64').astype('str') 


In [29]:
#in resi, there is 209 A, but the actual one we want is in other
new = other_AB[(other_AB['postcode_locator'] == 'CR4 4NA') & (other_AB['administrative_area'] == 'MERTON')
        & (other_AB['street_description'] == 'WILLOW LANE')]

In [30]:
new.head()

Unnamed: 0,uprn,class,parent_uprn,latitude,longitude,country,legal_name,sub_building_name,building_name,building_number,...,street_description,dependent_locality,locality,town_name,administrative_area,post_town,postcode,postcode_locator,parsed_address1,parsed_address2
4054601,48117025,CI03,48103046,51.392986,-0.162502,E,0.0,UNIT 16,1-11,,...,WILLOW LANE,0,0,MITCHAM,MERTON,MITCHAM,CR4 4NA,CR4 4NA,"UNIT 15, 3, WILLOW LANE","UNIT 16, 1-11, WILLOW LANE"
4058678,10015620852,OI07,0,51.393754,-0.167324,E,0.0,0,0,,...,WILLOW LANE,0,0,MITCHAM,MERTON,0,0,CR4 4NA,"TANK 37M FROM REICHHOLD UK LTD, 54 WILLOW LANE...",WILLOW LANE
4062973,48117022,CI03,48103046,51.392986,-0.162502,E,0.0,0,0,,...,WILLOW LANE,0,0,MITCHAM,MERTON,0,0,CR4 4NA,"UNIT 12 AND 18, 3, WILLOW LANE",WILLOW LANE
4063019,48100113,PP,0,51.393518,-0.165442,E,0.0,0,0,,...,WILLOW LANE,0,0,MITCHAM,MERTON,0,0,CR4 4NA,"ABBEY INDUSTRIAL ESTATE, WILLOW LANE",WILLOW LANE
4063416,10015621263,OI07,0,51.393915,-0.167046,E,0.0,0,0,,...,WILLOW LANE,0,0,MITCHAM,MERTON,0,0,CR4 4NA,"TANK 42M FROM TOBY, 58 WILLOW LANE 60M FROM WI...",WILLOW LANE


In [93]:
new.to_csv('willow_lane_addresses.csv')

In [None]:
#need to create an option to join to 209, 209.0, MANOR ROAD - OTHER AB 

UPRN MATCHING: If reprocessing data that has already been matched, Ignore the Data matching on the UPRN / Parent UPRN

In [32]:
#### -------- DATA MERGING --------
#join the london data with the RESIDENTIAL AddressBase dataset on UPRN
merged = london_data.merge(resi_AB, how = 'left', left_on = 'uprn', right_on = 'parent_uprn')

#merged['UCL_ID'].nunique()

In [33]:
#separate merged into no match and match 
merged_match, merged_no_match = separate_matches(merged, 'parent_uprn', 'parent_uprn')

merged_no_match = merged_no_match.dropna(axis=1, how='all')

In [34]:
#merge 2 on uprn not parent_uprn 
merged_2 = merged_no_match.merge(resi_AB, how = 'left', left_on = 'uprn_x', right_on = 'uprn')

In [35]:
#separate the merge 2 into two datasets for match v not match 
merged_2_match, merged_2_no_match = separate_matches(merged_2, 'uprn', 'uprn')

merged_2_no_match = merged_2_no_match.dropna(axis=1, how='all')      

In [36]:
merged_2_match = merged_2_match.rename(columns={"uprn": "uprn_OSAB"})
merged_match = merged_match.rename(columns={"uprn_y": "uprn_OSAB"})

#merged_2_match.count()

In [37]:
all_matched = pd.concat([merged_2_match, merged_match])


ADDRESS MATCHING: For all matching versions: Address matching starts here

In [27]:
merged_2_no_match = london_data

-------- DATA MERGING --------  STRATEGY 1 ADDRESS MATCH

In [28]:
#-------- DATA MERGING -------- 
## ADDRESS STRATEGY ONE 
#match on the street number, street name, postcode 
left_columns = ['site_number_clean', 'street_name', 'postcode_clean']
right_columns = ['building_number', 'street_description', 'postcode_locator']

# this is a very strict conservative join 
merged_on_address = merged_2_no_match.merge(resi_AB, how = 'left', left_on = left_columns, right_on = right_columns)

In [29]:
merged_on_address_match,merged_on_address_no_match =  separate_matches(merged_on_address, 'uprn', 'address_1')


In [30]:
#how many unique ids were matched?
merged_on_address_match['ID'].nunique()

7

In [31]:
## Add the merged_on_address_match to the matched
merged_on_address_match = merged_on_address_match.rename(columns={"uprn": "uprn_OSAB"})
merged_on_address_match['match_strategy'] = 'address_1'


In [32]:
all_matched = merged_on_address_match

In [26]:
# Use this if UPRN was used for matching above
#all_matched = pd.concat([all_matched, merged_on_address_match])


In [33]:
merged_on_address_no_match = merged_on_address_no_match.dropna(axis=1, how='all')


-------- DATA MERGING --------  STRATEGY 2 ADDRESS MATCH

In [34]:
#### ADDRESS Strategy 2 - use the site name GLA no pc with the parsed address 1 
left_columns_2 = ['site_name_GLA_no_pc', 'postcode_clean']  #London Data 
right_columns_2 = ['parsed_address1', 'postcode_locator'] #AB

merged_on_address2 = merged_on_address_no_match.merge(resi_AB, how = 'left', left_on = left_columns_2, right_on = right_columns_2)

In [35]:
merged_on_address2_match = merged_on_address2.drop(merged_on_address2[pd.isna(merged_on_address2['uprn']) == True].index) 
merged_on_address2_no_match = merged_on_address2.drop(merged_on_address2[pd.isna(merged_on_address2['uprn']) == False].index) 


In [36]:
merged_on_address2_match['ID'].nunique()

0

In [37]:
merged_on_address2_match = merged_on_address2_match.rename(columns={"uprn": "uprn_OSAB"})
merged_on_address2_match = merged_on_address2_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address2_match['match_strategy'] = 'address_2'

In [38]:
all_matched = pd.concat([all_matched, merged_on_address2_match])

In [39]:
merged_on_address2_no_match = merged_on_address2_no_match.dropna(axis=1, how='all')


-------- DATA MERGING --------  STRATEGY 3 ADDRESS MATCH

In [41]:
# #-------- DATA MERGING --------  STRATEGY 3 ADDRESS MATCH 
# use the site name LPA without post code ... 
left_columns_3 = ['site_name_LPA_no_pc', 'postcode_clean']
right_columns_3 = ['parsed_address1', 'postcode_locator']

#merged_on_address2_no_match['lpa_name'] = merged_on_address2_no_match['lpa_name'].str.upper()
#merged_on_address3 = merged_on_address2_no_match.merge(resi_AB, how = 'left', left_on = left_columns_3, right_on = right_columns_3)

merged_on_address3_match, merged_on_address3_no_match = my_merge(merged_on_address2_no_match, resi_AB, left_columns_3, right_columns_3)

In [42]:
#for batch 2, gets 0 results
#merged_on_address3_no_match.count()

In [43]:
# add the matched into all matched
merged_on_address3_match = merged_on_address3_match.rename(columns={"uprn": "uprn_OSAB"})
#merged_on_address3_match = merged_on_address3_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address3_match['match_strategy'] = 'address_3'

frames = [all_matched, merged_on_address3_match]

all_matched = pd.concat(frames)

In [44]:
merged_on_address3_no_match = merged_on_address3_no_match.dropna(axis=1, how='all')

In [45]:
#outline strategy 4 
#### ADDRESS Strategy 4 - use the site name GLA no pc with the parsed address 2 
left_columns_4 = ['site_name_GLA_no_pc', 'postcode_clean']  #London Data 
right_columns_4 = ['parsed_address2', 'postcode_locator'] #AB

merged_on_address4 = merged_on_address3_no_match.merge(resi_AB, how = 'left', left_on = left_columns_4, right_on = right_columns_4)

# strategy 4B would be site_name_LPA_no_pc and parsed_address1 

In [46]:
merged_on_address4_match = merged_on_address4.drop(merged_on_address4[pd.isna(merged_on_address4['uprn']) == True].index) 
merged_on_address4_no_match = merged_on_address4.drop(merged_on_address4[pd.isna(merged_on_address4['uprn']) == False].index) 


In [47]:
merged_on_address4_match['ID'].nunique()

0

In [48]:
merged_on_address4_match = merged_on_address4_match.rename(columns={"uprn": "uprn_OSAB"})
#merged_on_address3_match = merged_on_address3_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address4_match['match_strategy'] = 'address_4'

frames = [all_matched, merged_on_address4_match]

all_matched = pd.concat(frames)
merged_on_address4_no_match = merged_on_address4_no_match.dropna(axis=1, how='all')

In [49]:
## Strategy 5 

left_columns_5 = ['concat_addr', 'lpa_name']
right_columns_5 = ['parsed_address1', 'administrative_area']

merged_on_address5_match, merged_on_address5_no_match = my_merge(merged_on_address4_no_match, resi_AB, left_columns_5, right_columns_5)

In [50]:
merged_on_address5_match['ID'].nunique()

52

In [51]:
merged_on_address5_match = merged_on_address5_match.rename(columns={"uprn": "uprn_OSAB"})
#merged_on_address3_match = merged_on_address3_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address5_match['match_strategy'] = 'address_5'

frames = [all_matched, merged_on_address5_match]

all_matched = pd.concat(frames)
merged_on_address5_no_match = merged_on_address5_no_match.dropna(axis=1, how='all')

In [52]:
## Strategy 5B
left_columns_5B = ['concat_addr', 'lpa_name']
right_columns_5B = ['parsed_address2', 'administrative_area']

merged_on_address5B_match, merged_on_address5B_no_match = my_merge(merged_on_address5_no_match, resi_AB, left_columns_5B, right_columns_5B)

In [53]:
merged_on_address5B_match['ID'].nunique()

79

In [54]:
merged_on_address5B_match = merged_on_address5B_match.rename(columns={"uprn": "uprn_OSAB"})
#merged_on_address3_match = merged_on_address3_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address5B_match['match_strategy'] = 'address_5B'

frames = [all_matched, merged_on_address5B_match]

all_matched = pd.concat(frames)
merged_on_address5B_no_match = merged_on_address5B_no_match.dropna(axis=1, how='all')

In [None]:
# pao_start_number + street_description

In [63]:
## Strategy 6
left_columns_6 = ['site_number_clean', 'street_name', 'postcode_clean']
right_columns_6 = ['pao_start_number', 'street_description', 'postcode_locator']

merged_on_address6_match, merged_on_address6_no_match = my_merge(merged_on_address5B_no_match, resi_AB, left_columns_6, right_columns_6)

In [64]:
merged_on_address6_match['ID'].nunique()

34

In [92]:
merged_on_address6_match = merged_on_address6_match.rename(columns={"uprn": "uprn_OSAB"})
#merged_on_address3_match = merged_on_address3_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address6_match['match_strategy'] = 'address_6'

frames = [all_matched, merged_on_address6_match]

all_matched = pd.concat(frames)
merged_on_address6_no_match = merged_on_address6_no_match.dropna(axis=1, how='all')

------ specific address analysis ----- 

In [66]:
merged_on_address6_match.head(20)

Unnamed: 0,ID,planning_application_number,lpa_name,application_type,application_type_full,description,number_of_units,site_number_clean,street_name,postcode_clean,...,dependent_locality,locality,town_name,administrative_area,post_town,postcode,postcode_locator,parsed_address1,parsed_address2,match_strategy
7,1547,P2017/2905/PRA,ISLINGTON,Prior Approval,Prior Approval (Class M - formerly IA),Notification for Prior Approval for the change...,1.0,194,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,LONDON,N4 3NX,N4 3NX,"FLAT 3, 194, SEVEN SISTERS ROAD","FLAT 3, 194A, SEVEN SISTERS ROAD",address_6
8,1547,P2017/2905/PRA,ISLINGTON,Prior Approval,Prior Approval (Class M - formerly IA),Notification for Prior Approval for the change...,1.0,194,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,LONDON,N4 3NX,N4 3NX,"FLAT 1, 194, SEVEN SISTERS ROAD","FLAT 1, 194A, SEVEN SISTERS ROAD",address_6
9,1547,P2017/2905/PRA,ISLINGTON,Prior Approval,Prior Approval (Class M - formerly IA),Notification for Prior Approval for the change...,1.0,194,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,LONDON,N4 3NX,N4 3NX,"FLAT 2, 194, SEVEN SISTERS ROAD","FLAT 2, 194A, SEVEN SISTERS ROAD",address_6
10,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 5, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6
11,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 3, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6
12,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 1, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6
13,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 2, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6
14,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 4, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6
15,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 5, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6
16,1547,P2016/2604/PRA,ISLINGTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval application for the proposed ch...,1.0,222,SEVEN SISTERS ROAD,N4 3NX,...,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"FLAT 3, 222, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD,address_6


In [62]:
other_AB[(other_AB['building_number'] == '3') & (other_AB['street_description'] == 'HIGH STREET') & (other_AB['postcode_locator'] == 'BR1 1LF')]

Unnamed: 0,uprn,class,parent_uprn,latitude,longitude,country,legal_name,sub_building_name,building_name,building_number,...,street_description,dependent_locality,locality,town_name,administrative_area,post_town,postcode,postcode_locator,parsed_address1,parsed_address2


In [178]:
### hmm why isnt itfinding the correct record?? 
#this record exists in other AB - okay... so might work once I match taht! 
other_AB[(other_AB['street_description'] == 'SEVEN SISTERS ROAD')  & (other_AB['postcode_locator'] == 'N4 3NX') 
                        & (other_AB['parsed_address1'] == '194, SEVEN SISTERS ROAD')]

Unnamed: 0,uprn,class,parent_uprn,latitude,longitude,country,legal_name,sub_building_name,building_name,building_number,...,street_description,dependent_locality,locality,town_name,administrative_area,post_town,postcode,postcode_locator,parsed_address1,parsed_address2
4055311,5300082025,CR,0,51.562168,-0.109969,E,0.0,0,0,194.0,...,SEVEN SISTERS ROAD,0,0,LONDON,ISLINGTON,LONDON,N4 3NX,N4 3NX,"194, SEVEN SISTERS ROAD","194, SEVEN SISTERS ROAD"
4136519,5300082026,PP,0,51.562168,-0.109969,E,0.0,0,0,,...,SEVEN SISTERS ROAD,0,0,LONDON,ISLINGTON,0,0,N4 3NX,"194, SEVEN SISTERS ROAD",SEVEN SISTERS ROAD


In [171]:
seven_sisters.to_csv('seven_sisters.csv', index=False) 

In [None]:
### Possible strategy 6 

#pao_start_number <> site_number_clean
#street_description <> street_name
#administrative_area <> lpa_name


#left_columns_5 = ['site_number_clean', 'street_name', 'lpa_name']
#right_columns_5 = ['pao_start_number','street_description', 'administrative_area']

In [125]:
##-------- DATA MERGING --------  COMMERCIAL PROPERTIES --- Reproduce the process with commercial properties 
#Merge on Parent UPRN 
non_resi_merged = merged_on_address3_no_match.merge(other_AB,how = 'left', left_on = 'uprn_x', right_on = 'parent_uprn')

non_resi_match, non_resi_no_match =  separate_matches(non_resi_merged, 'parent_uprn', 'parent_uprn')

non_resi_no_match = non_resi_no_match.dropna(axis=1, how='all')

In [126]:
#merge on UPRN
non_resi_merged_2 = non_resi_no_match.merge(other_AB, how = 'left', left_on = 'uprn_x', right_on = 'uprn')

non_resi_match_2, non_resi_no_match_2 =  separate_matches(non_resi_merged_2, 'parent_uprn', 'uprn')

non_resi_no_match_2 = non_resi_no_match_2.dropna(axis=1, how='all')

In [127]:
non_resi_match_2 = non_resi_match_2.rename(columns={"uprn": "uprn_OSAB"})
#non_resi_match_2['match_strategy'] = 'uprn'
non_resi_match_2.count()

non_resi_match = non_resi_match.rename(columns={"uprn": "uprn_OSAB"})
#non_resi_match['match_strategy'] = 'parent_uprn'

In [128]:
#union them together 
non_resi_all_matched = pd.concat([non_resi_match_2, non_resi_match])

---- Commercial address matching --- 

In [67]:
#now join on addresses strat 1 
left_columns = ['site_number_clean', 'street_name', 'postcode_clean']
right_columns = ['building_number', 'street_description', 'postcode_locator']

#non_resi_address_merge_match, non_resi_address_merge_no_match = my_merge(non_resi_no_match_2, other_AB, left_columns, right_columns)

In [93]:
#use this one if skipping UPRN 
non_resi_address_merge_match, non_resi_address_merge_no_match = my_merge(merged_on_address6_no_match, other_AB, left_columns, right_columns)


In [94]:
non_resi_address_merge_no_match = non_resi_address_merge_no_match.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match = non_resi_address_merge_match.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match['match_strategy'] = 'address_1'

non_resi_all_matched = non_resi_address_merge_match

In [95]:
non_resi_address_merge_match['ID'].nunique()

4

In [96]:
#USE THIS FIRST TIME THRU 
# non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match])

In [97]:
#join addresses strat 2
left_columns_2 = ['site_name_GLA_no_pc', 'postcode_clean'] 
right_columns_2 = ['parsed_address1', 'postcode_locator']
 
non_resi_address_merge_match2, non_resi_address_merge_no_match2 = my_merge(non_resi_address_merge_no_match, other_AB, left_columns_2,right_columns_2 )

In [98]:
non_resi_address_merge_match2['ID'].nunique()

0

In [99]:
non_resi_address_merge_no_match2 = non_resi_address_merge_no_match2.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match2 = non_resi_address_merge_match2.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match2['match_strategy'] = 'address_2'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match2])

In [100]:
# STRATEGY 3 ADDRESS MATCH 
left_columns_3 = ['site_name_LPA_no_pc', 'postcode_clean'] 
right_columns_3 = ['parsed_address1', 'postcode_locator']


non_resi_address_merge_match3, non_resi_address_merge_no_match3 = my_merge(non_resi_address_merge_no_match2, other_AB, left_columns_3, right_columns_3)

In [101]:
non_resi_address_merge_match3['ID'].nunique()

0

In [102]:
non_resi_address_merge_no_match3 = non_resi_address_merge_no_match3.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match3 = non_resi_address_merge_match3.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match3['match_strategy'] = 'address_3'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match3])

------- ADDRESS ----- Strategy 4 COMM 

In [103]:
# STRATEGY 4 ADDRESS MATCH 
left_columns_4 = ['site_name_GLA_no_pc', 'postcode_clean'] 
right_columns_4 = ['parsed_address2', 'postcode_locator']


non_resi_address_merge_match4, non_resi_address_merge_no_match4 = my_merge(non_resi_address_merge_no_match3, other_AB, left_columns_4, right_columns_4)

In [104]:
non_resi_address_merge_match4['ID'].nunique()

0

In [105]:
non_resi_address_merge_no_match4 = non_resi_address_merge_no_match4.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match4 = non_resi_address_merge_match4.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match4['match_strategy'] = 'address_4'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match4])

In [106]:
#strategy 5 ADDRESS MATCH 
left_columns_5 = ['concat_addr', 'lpa_name'] 
right_columns_5 = ['parsed_address1', 'administrative_area']


non_resi_address_merge_match5, non_resi_address_merge_no_match5 = my_merge(non_resi_address_merge_no_match4, other_AB, left_columns_5, right_columns_5)


In [107]:
non_resi_address_merge_no_match5 = non_resi_address_merge_no_match5.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match5 = non_resi_address_merge_match5.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match5['match_strategy'] = 'address_5'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match5])

In [108]:
#strategy 5B ADDRESS MATCH 
left_columns_5B = ['concat_addr', 'lpa_name'] 
right_columns_5B = ['parsed_address2', 'administrative_area']


non_resi_address_merge_match5B, non_resi_address_merge_no_match5B = my_merge(non_resi_address_merge_no_match5, other_AB, left_columns_5B, right_columns_5B)


In [109]:
non_resi_address_merge_match5B['ID'].nunique() #was 88

88

In [110]:
non_resi_address_merge_no_match5B = non_resi_address_merge_no_match5B.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match5B = non_resi_address_merge_match5B.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match5B['match_strategy'] = 'address_5B'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match5B])

strategy 6, same as for resi --- 

In [111]:
## Strategy 6
left_columns_6 = ['site_number_clean', 'street_name', 'postcode_clean']
right_columns_6 = ['pao_start_number', 'street_description', 'postcode_locator']

non_resi_address_merge_match6, non_resi_address_merge_no_match6 = my_merge(non_resi_address_merge_no_match5B, other_AB, left_columns_6, right_columns_6)

In [112]:
non_resi_address_merge_match6['ID'].nunique()

18

In [113]:
non_resi_address_merge_no_match6 = non_resi_address_merge_no_match6.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match6 = non_resi_address_merge_match6.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match6['match_strategy'] = 'address_6'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match6])

----- specific address analysis  ---- 

In [119]:
#old address had CR4 4NA as the postcode 
willow = other_AB[(other_AB['administrative_area'] == 'MERTON') & 
        (other_AB['street_description'] == 'WILLOW LANE')]

In [121]:
willow.to_csv('willow_lane_export.csv',index = False )

In [126]:
resi_AB[(resi_AB['administrative_area'] == 'BROMLEY') & 
        (resi_AB['street_description'] == 'HIGH STREET')].to_csv('bromley_high_street.csv', index = False)

#(other_AB['postcode_locator'] == 'BR1 1LF') & 

In [127]:
other_AB[(other_AB['administrative_area'] == 'BROMLEY') & 
        (other_AB['street_description'] == 'HIGH STREET')].to_csv('bromley_high_street_OTHER.csv', index = False)

#(other_AB['postcode_locator'] == 'BR1 1LF') & 

In [129]:
non_resi_address_merge_no_match5B.iloc[1]

ID                                                                          1544
planning_application_number                                             15/P0298
lpa_name                                                                  MERTON
application_type                                                  Prior Approval
application_type_full                      Prior Approval (Class O - formerly J)
description                    Prior approval in relation to the change of us...
number_of_units                                                             51.0
site_number_clean                                                       21 & 21A
street_name                                                          WILLOW LANE
postcode_clean                                                           CR4 4NA
site_name_clean                                      Clock House & Connect House
site_name_GLA                  Connect House, 21a And Clock House, 21, Willow...
site_name_LPA               

In [104]:
non_resi_address_merge_no_match5B.head(20)

Unnamed: 0,ID,planning_application_number,lpa_name,application_type,application_type_full,description,number_of_units,site_number_clean,street_name,postcode_clean,...,decision,status,application_date,decision_date,parsed_street_LPA,parsed_street_GLA,short_site_name_LPA],concat_addr,site_name_LPA_no_pc,site_name_GLA_no_pc
0,1544,15/P0298,MERTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval in relation to the change of us...,1.0,21 & 21A,WILLOW LANE,CR4 4NA,...,Prior Approval Granted,Completed,09/02/2015,29/03/2015,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...","Connect House, 21a And Clock House, 21, Willow..."
1,1544,15/P0298,MERTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval in relation to the change of us...,51.0,21 & 21A,WILLOW LANE,CR4 4NA,...,Prior Approval Granted,Completed,09/02/2015,29/03/2015,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...","Connect House, 21a And Clock House, 21, Willow..."
2,1544,15/P0298,MERTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval in relation to the change of us...,26.0,21 & 21A,WILLOW LANE,CR4 4NA,...,Prior Approval Granted,Completed,09/02/2015,29/03/2015,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...","Connect House, 21a And Clock House, 21, Willow..."
3,1544,15/P0404,MERTON,Prior Approval,Prior Approval (Class O - formerly J),Prior approval in relation to the change of us...,5.0,21 & 21A,WILLOW LANE,CR4 4NA,...,Prior Approval Granted,Completed,09/02/2015,01/04/2015,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...","Clock House And Connect House, 21-21a, Willow ..."
4,1544,13/P4059,MERTON,Prior Approval,Prior Approval: Change of use - offices to dwe...,PRIOR APPROVAL IN RELATION TO THE CHANGE OF US...,46.0,21 & 21A,WILLOW LANE,CR4 4NA,...,,Superseded,,16/02/2015,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...",
5,1544,14/P1849,MERTON,Prior Approval,Prior Approval: Change of use - offices to dwe...,PRIOR APPROVAL IN RELATION TO THE CHANGE OF US...,46.0,21 & 21A,WILLOW LANE,CR4 4NA,...,,Superseded,,16/07/2014,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...",
6,1544,14/P3278,MERTON,Prior Approval,Prior Approval: Change of use - offices to dwe...,PRIOR APPROVAL IN RELATION TO THE CHANGE OF US...,101.0,21 & 21A,WILLOW LANE,CR4 4NA,...,,Superseded,,23/10/2014,Willow Lane,,clock house 21 willow lane & connect house,"Clock House & Connect House, 21 & 21A, WILLOW ...","Clock House 21 Willow Lane & Connect House, 2...",
7,1551,18/P0981,MERTON,Prior Approval,Prior Approval (Class M - formerly IA),PRIOR APPROVAL FOR CHANGE OF USE FROM RETAIL (...,1.0,107b,CENTRAL ROAD,SM4 5SQ,...,Prior Approval Granted,Completed,13/03/2018,16/04/2018,,,107b central road morden sm4 5sq,"107b, CENTRAL ROAD",107b Central Road Morden,"107, Central Road"
8,1551,18/P0981,MERTON,Prior Approval,Prior Approval (Class M - formerly IA),PRIOR APPROVAL FOR CHANGE OF USE FROM RETAIL (...,1.0,107b,CENTRAL ROAD,SM4 5SQ,...,Prior Approval Granted,Completed,13/03/2018,16/04/2018,,,107b central road morden sm4 5sq,"107b, CENTRAL ROAD",107b Central Road Morden,"107, Central Road"
10,1560,19/00597/CUTA3,BROMLEY,Prior Approval,Prior Approval: Change of use - retail/service...,Change of use of the ground floor from betting...,0.0,16 - 18,HIGH STREET,BR1 1EA,...,GRPA,Approved,20/02/2019,16/04/2019,High Street,,16 - 18 high street bromley br1 1ea,"16 - 18, HIGH STREET",16 - 18 High Street Bromley,


In [71]:
## ------------------------------- Post Match  -------------------------------

In [114]:
#print match rate
resi_match_rate = 100 * all_matched['ID'].nunique() / london_data['ID'].nunique()

print('Resi Match rate: ', resi_match_rate) 

Resi Match rate:  9.646118721461187


In [115]:
other_match_rate = 100 * non_resi_all_matched['ID'].nunique() / london_data['ID'].nunique()

print('Other Match rate: ', other_match_rate) 

Other Match rate:  8.789954337899543


In [137]:
#before export, make all these columns blank instead of 0 
#'legal_name', 'sub_building_name', 'building_name','building_number','street_description', 'dependent_locality	locality
all_matched['street_name'] = all_matched['street_name'].replace(0,'')
all_matched['legal_name'] = all_matched['legal_name'].replace(0,'')
all_matched['sub_building_name'] = all_matched['sub_building_name'].replace(0,'')
all_matched['building_name'] = all_matched['building_name'].replace(0,'')
all_matched['building_number'] = all_matched['building_number'].replace(0,'')
all_matched['street_description'] = all_matched['street_description'].replace(0,'')
all_matched['dependent_locality'] = all_matched['dependent_locality'].replace(0,'')
all_matched['locality'] = all_matched['locality'].replace(0,'')
all_matched['post_town'] = all_matched['post_town'].replace(0,'')
all_matched['postcode'] = np.where(all_matched['postcode'] == 0, all_matched['postcode_clean'], all_matched['postcode'])


In [76]:
all_matched.head(20)

Unnamed: 0,UCL_ID,planning_application_number,lpa_name,application_type,application_type_full,description,number_of_units,site_number_clean,street_name,postcode_clean,...,town_name,administrative_area,post_town,postcode,match_strategy,Number_units_found,FPP_PA_mix?,number_street,number_street_x,number_street_y
42,54,21/01316/PREZA,Newham,Prior Approval,Prior Approval: Change of use - light industri...,Application to determine if prior approval is ...,,2A,Boundary Road,E13 9PR,...,LONDON,NEWHAM,LONDON,E13 9PR,uprn,,,,,
87,123,21/00771/PRECOU,Newham,Prior Approval,Prior Approval: Change of use - retail/service...,Prior approval for the change of the retail un...,3.0,140,Portway,E15 3QW,...,LONDON,NEWHAM,LONDON,E15 3QW,uprn,,,,,
93,132,19/AP/1141,Southwark,Prior Approval,Prior Approval: Change of use - storage to dwe...,Notification for prior approval for a change o...,4.0,4A,,SE1 4QG,...,LONDON,SOUTHWARK,LONDON,SE1 4QG,uprn,,,,,
99,142,22/02610/PRECOU,Newham,Prior Approval,"Prior Approval: Change of use from Commercial,...",Prior approval for the change of use of the ba...,,140,Portway,E15 3QW,...,LONDON,NEWHAM,LONDON,E15 3QW,uprn,,,,,
102,145,22/AP/3633,Southwark,Prior Approval,"Prior Approval: Change of use from Commercial,...",Prior approval notification for the change of ...,,3,ONEGA GATE,SE16 7PF,...,LONDON,SOUTHWARK,,SE16 7PF,uprn,,,,,
106,149,22/03970/PIAPA,Westminster,Prior Approval,"Prior Approval: Change of use from Commercial,...",Application for Prior Approval Under Class G o...,,54,Rochester Row,SW1P 1JU,...,LONDON,CITY OF WESTMINSTER,LONDON,SW1P 1JU,uprn,,,,,
115,163,20/2001/PNO,Barnet,Prior Approval,,Change of use of ground and lower ground floor...,1.0,3,Leicester Road,EN5 5EW,...,BARNET,BARNET,BARNET,EN5 5EW,uprn,,,,,
118,167,22/1285/GPD26,Richmond,Prior Approval,,CHANGE OF USE TO SINGLE DWELLING HOUSE,1.0,5,Bridle Lane,TW1 3EG,...,TWICKENHAM,RICHMOND UPON THAMES,TWICKENHAM,TW1 3EG,uprn,,,,,
130,181,21/03383/PRIOR,Bexley,Prior Approval,,Notification for Prior Approval for change of ...,1.0,13,Sidcup High Street,DA14 6EP,...,SIDCUP,BEXLEY,SIDCUP,DA14 6EP,uprn,,,,,
134,185,DM2020/00970,Sutton,Prior Approval,,Prior Approval for change of use from an offic...,1.0,177,Gander Green Lane,SM1 2EZ,...,SUTTON,SUTTON,SUTTON,SM1 2EZ,uprn,,,,,


In [140]:
#write out to csv 

all_matched.to_csv('London2_address_matched_2603.csv', index=False) 

non_resi_all_matched.to_csv('London2_non_resi_match_2603.csv', index=False)  

In [114]:
non_resi_address_merge_no_match5B.to_csv('London2_no_match_0805.csv', index = False)

In [86]:
non_resi_address_merge_no_match3[non_resi_address_merge_no_match3['ID'] == 1561]

Unnamed: 0,ID,planning_application_number,lpa_name,application_type,application_type_full,description,number_of_units,site_number_clean,street_name,postcode_clean,...,site_name_GLA,site_name_LPA,uprn_x,decision,status,application_date,decision_date,parsed_street_LPA,parsed_street_GLA,short_site_name_LPA]
24,1561,16/05471/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of third and fourth floor office...,4.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,09/12/2016,23/01/2017,High Street,,143 high street bromley br1 1jh
25,1561,16/05471/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of third and fourth floor office...,4.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,09/12/2016,23/01/2017,High Street,,143 high street bromley br1 1jh
26,1561,17/00490/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of second floor offices from Cla...,2.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,07/02/2017,23/02/2017,High Street,,143 high street bromley br1 1jh
27,1561,17/00490/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of second floor offices from Cla...,2.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,07/02/2017,23/02/2017,High Street,,143 high street bromley br1 1jh
