In [1]:
""" Script to take planning applications and match to OSAddressBase addresses

Written by: Christine Langston, April 2024
"""
import pandas as pd
import time
import numpy as np
import copy
import re

In [2]:
#create a function to iterate through csv. use for OSAddressBase file
def read_csv(file_name, columns):
    for chunk in pd.read_csv(file_name, chunksize=10000, usecols=columns, 
                            dtype={'urpn':str, 'parent_urpn': str, 'class': str, 'latitude': float, 
                                                  'longitude': float, 'country': str}):
    
        #if chunk['country'] == 'E':
        yield chunk


In [3]:
#takes a merged dataset and separates matched addresses and not matched
def separate_matches(dataset, column_name, match_strategy):
    dataset_match = dataset.drop(dataset[pd.isna(dataset[column_name]) == True].index)
    dataset_no_match = dataset.drop(dataset[pd.isna(dataset[column_name]) == False].index)
    dataset_match['match_strategy'] = match_strategy
    return dataset_match, dataset_no_match

In [4]:
#function to merge and then split based on if an address was matched  
def my_merge(left, right, left_on, right_on): 
    merged = left.merge(right, how = 'left', left_on = left_on, right_on = right_on)
    merged_match = merged.drop(merged[pd.isna(merged['uprn']) == True].index) 
    merged_no_match = merged.drop(merged[pd.isna(merged['uprn']) == False].index) 
    return merged_match, merged_no_match

In [5]:
#### READ IN THE DATA IF EXPORTED 
resi_AB = pd.read_csv('data/resi_AB_cleaned_parsed_110424.csv', low_memory = False)


In [None]:
other_AB = pd.read_csv('data/other_AB_cleaned_parsed_110424.csv',low_memory = False)



In [6]:
#load in the data that hasn't been matched yet
london_data = pd.read_csv('data/London2_no_match_2603.csv',low_memory = False)

In [5]:
# Read in the data
"""
fewer_columns  = ['uprn', 'parent_uprn', 'class', 'latitude', 'longitude', 'country' ]
all_columns = ['uprn', 'parent_uprn', 'class', 'latitude', 'longitude', 'country', 'legal_name','sub_building_name',
           'building_name','building_number','sao_start_number','sao_start_suffix','sao_end_number',
           'sao_end_suffix','sao_text','alt_language_sao_text','pao_start_number','pao_start_suffix',
           'pao_end_number','pao_end_suffix','pao_text','alt_language_pao_text','usrn','usrn_match_indicator',
           'area_name','level','official_flag','os_address_toid','os_address_toid_version','os_roadlink_toid',
           'os_roadlink_toid_version','os_topo_toid','os_topo_toid_version','voa_ct_record','voa_ndr_record',
           'street_description','alt_language_street_description','dependent_thoroughfare','thoroughfare',
           'double_dependent_locality','dependent_locality','locality', 'town_name', 
          'administrative_area','post_town','postcode','postcode_locator' ]
"""

address_matching_columns = ['uprn', 'parent_uprn', 'class', 'latitude', 'longitude', 'country',
                    'legal_name','sub_building_name', 'building_name','building_number','street_description',
                    'dependent_locality','locality', 'town_name', 'administrative_area','post_town','postcode', 'postcode_locator' ]

#address_base_file = "/Users/christine/Documents/_UCL_grad school/research/ab_plus_england_202308150944.csv"

#nana_process_file = "/Users/christine/OneDrive - University College London/WP1_Address list compilation/13_stage_3_from_NanaWei/PA_batch1_London_202402_processed.csv"

#nana_process_file = "/Users/christine/Documents/_UCL_grad school/research/Batch2_London_filtered_with_manual&Glenigan.csv"


#process_file_ben_columns = '/Users/christine/OneDrive - University College London/WP1_Address list compilation/13_stage_3_from_NanaWei/PA_batch1 with units and type_03192024.csv'



In [8]:
resi_AB.head()

Unnamed: 0,uprn,class,parent_uprn,latitude,longitude,country,legal_name,sub_building_name,building_name,building_number,...,pao_text,street_description,dependent_locality,locality,town_name,administrative_area,post_town,postcode,postcode_locator,parsed_address
0,10012778289,RD02,,54.036005,-2.340833,E,,,,,...,SCOUTBER END FARM,OLD OLIVER LANE TO RAGGED HALL,,RATHMELL,SETTLE,NORTH YORKSHIRE,,,BD24 0LP,"SCOUTBER END FARM, OLD OLIVER LANE TO RAGGED HALL"
1,10013819934,RG02,10013820000.0,54.780687,-1.510651,E,,,,,...,GARAGE SITE 53,DOWSEY ROAD,,,SHERBURN,DURHAM,,,DH6 1JH,"22.0, GARAGE SITE 53, DOWSEY ROAD"
2,10014309637,RD04,200003700000.0,51.26651,0.497553,E,,,,,...,,MILTON STREET,,,MAIDSTONE,KENT,,,ME16 8LD,"ROOM 1, 85.0, MILTON STREET"
3,10033213520,RD01,10002820000.0,53.903252,-0.158754,E,,,,,...,LONGBEACH LEISURE PARK,HORNSEA BURTON ROAD,,,HORNSEA,EAST RIDING OF YORKSHIRE,,,HU18 1TL,"33 SOUTHFIELD, LONGBEACH LEISURE PARK, HORNSEA..."
4,10033545757,RD06,100023500000.0,51.514983,-0.17926,E,,,,,...,,WESTBOURNE TERRACE,,,LONDON,CITY OF WESTMINSTER,,,W2 3UJ,"SECOND FLOOR, 58.0, WESTBOURNE TERRACE"


Some data cleaning steps to clean up the Nan and numerical data coming from the CSV

In [9]:
### ------ DATA CLEANING  ----------
#edit the parent uprn from OSAddress base to fill out to be strings with 12 digits and leading zeros
#replace nan
resi_AB = resi_AB.replace([np.nan, -np.inf], 0)

#cast as integer
resi_AB['parent_uprn'] = resi_AB['parent_uprn'].astype('Int64')
resi_AB['uprn'] = resi_AB['uprn'].astype('Int64')


In [10]:
### ------ DATA CLEANING  ----------
#cast as string 
resi_AB['parent_uprn'] = resi_AB['parent_uprn'].astype(str)
resi_AB['uprn'] = resi_AB['uprn'].astype(str)

#fill in with left side padding zeros 
resi_AB['parent_uprn'] = resi_AB['parent_uprn'].apply(lambda x: '{0:0>12}'.format(x))
resi_AB['uprn'] = resi_AB['uprn'].apply(lambda x: '{0:0>12}'.format(x))


In [18]:
### ------ DATA CLEANING  ----------

#BATCH 1 london_data has extra quotation marks, need to remove
#london_data['uprn'] = london_data['uprn'].apply(lambda x: x.strip("''") if not pd.isna(x) else x)


In [14]:
london_data['uprn_x']

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
3251   NaN
3252   NaN
3253   NaN
3254   NaN
3255   NaN
Name: uprn_x, Length: 3256, dtype: float64

In [16]:
### ------ DATA CLEANING  ----------
london_data['uprn'] = london_data['uprn'].astype('Int64')



In [17]:
#cast the strings and make sure it looks good 
london_data['uprn'] = london_data['uprn'].astype('str') #apply(lambda x: str(x) if not pd.isna(x) else x)

london_data['uprn'] = london_data['uprn'].apply(lambda x: '{0:0>12}'.format(x) if not pd.isna(x) else x)

In [15]:
### ------ DATA CLEANING  ----------

london_data = london_data.replace('00000000<NA>',np.NaN)

Ignore the creation of these columns if rematching data

In [20]:
### ------ DATA CLEANING  ----------
#street address from site_name_LPA
london_data['parsed_street_LPA'] = london_data['site_name_LPA'].apply(lambda x: re.findall("[0-9]+.-?.[0-9]+?\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) and '-' in x
                                                                  else (re.findall("[0-9]+\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) else []))


In [21]:
london_data['parsed_street_LPA'] =  london_data['parsed_street_LPA'].apply(lambda x: x[0][0] + x[0][1]  if len(x) > 0 else None)

In [22]:
#cleaning - street address
london_data['parsed_street_GLA'] = london_data['site_name_GLA'].apply(lambda x: re.findall("[0-9]+.-?.[0-9]+?\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) and '-' in x
                                                                  else (re.findall("[0-9]+\s(.+)(Road|Lane|Avenue|Parade|Courtyard|Street|Gardens|Drive)", x) if not pd.isna(x) else []))


In [23]:
london_data['parsed_street_GLA'] =  london_data['parsed_street_GLA'].apply(lambda x: x[0][0] + x[0][1]  if len(x) > 0 else None)

In [24]:
#if the original street was empty, then we want to use the LPA Or GLA parsed name as street_name 

london_data['street_name'] = np.where(london_data['street_name'].isnull(), np.where(london_data['parsed_street_LPA'].isnull(), london_data['parsed_street_GLA'], london_data['parsed_street_LPA']), london_data['street_name'] )

In [26]:
#make london_data all into capitals? 
london_data['street_name'] = london_data['street_name'].str.upper()

In [27]:
# DATA EXPLORATION - EXPORT CSV IF NEEDED 
london_data.to_csv('batch2_cleaned.csv', index = False)

might need to do this one

In [17]:
### ------ DATA CLEANING  ----------

#Address matching data clean, make building number into a string 
resi_AB['building_number'] = resi_AB['building_number'].astype('Int64').astype('str') 

Ignore the Data matching on the UPRN / Parent for rerunning the matching algorithm

In [32]:
#### -------- DATA MERGING --------
#join the london data with the RESIDENTIAL AddressBase dataset on UPRN
merged = london_data.merge(resi_AB, how = 'left', left_on = 'uprn', right_on = 'parent_uprn')

#merged['UCL_ID'].nunique()

In [33]:
#separate merged into no match and match 
merged_match, merged_no_match = separate_matches(merged, 'parent_uprn', 'parent_uprn')

merged_no_match = merged_no_match.dropna(axis=1, how='all')

In [34]:
#merge 2 on uprn not parent_uprn 
merged_2 = merged_no_match.merge(resi_AB, how = 'left', left_on = 'uprn_x', right_on = 'uprn')

In [35]:
#separate the merge 2 into two datasets for match v not match 
merged_2_match, merged_2_no_match = separate_matches(merged_2, 'uprn', 'uprn')

merged_2_no_match = merged_2_no_match.dropna(axis=1, how='all')      

In [36]:
merged_2_match = merged_2_match.rename(columns={"uprn": "uprn_OSAB"})
merged_match = merged_match.rename(columns={"uprn_y": "uprn_OSAB"})

#merged_2_match.count()

In [37]:
all_matched = pd.concat([merged_2_match, merged_match])


In [18]:
merged_2_no_match = london_data

Address matching starts here

In [19]:
#-------- DATA MERGING -------- 
## ADDRESS STRATEGY ONE 
#match on the street number, street name, postcode 
left_columns = ['site_number_clean', 'street_name', 'postcode_clean']
right_columns = ['building_number', 'street_description', 'postcode_locator']

# this is a very strict conservative join 
merged_on_address = merged_2_no_match.merge(resi_AB, how = 'left', left_on = left_columns, right_on = right_columns)

In [20]:
merged_on_address.count()

ID                             3256
planning_application_number    3256
lpa_name                       3256
application_type               3256
application_type_full          3256
description                    3256
number_of_units                3195
site_number_clean              2178
street_name                    3199
postcode_clean                 2755
site_name_clean                1250
site_name_GLA                  2895
site_name_LPA                  2755
uprn_x                           65
decision                       2517
status                         3256
application_date               2389
decision_date                  3255
parsed_street_LPA              1263
parsed_street_GLA               110
short_site_name_LPA]           3200
uprn                              0
class                             0
parent_uprn                       0
latitude                          0
longitude                         0
country                           0
legal_name                  

In [95]:
#int64
#london_data[london_data['ID'] == 1561]
print(merged_on_address[merged_on_address['ID'] == 1561])

#non_resi_address_merge_no_match3[non_resi_address_merge_no_match3['ID'] == 1561]

      ID planning_application_number lpa_name application_type  \
26  1561              16/05471/RESPA  Bromley   Prior Approval   
27  1561              16/05471/RESPA  Bromley   Prior Approval   
28  1561              17/00490/RESPA  Bromley   Prior Approval   
29  1561              17/00490/RESPA  Bromley   Prior Approval   

                    application_type_full  \
26  Prior Approval (Class O - formerly J)   
27  Prior Approval (Class O - formerly J)   
28  Prior Approval (Class O - formerly J)   
29  Prior Approval (Class O - formerly J)   

                                          description  number_of_units  \
26  Change of use of third and fourth floor office...              4.0   
27  Change of use of third and fourth floor office...              4.0   
28  Change of use of second floor offices from Cla...              2.0   
29  Change of use of second floor offices from Cla...              2.0   

   site_number_clean  street_name postcode_clean  ... sub_building_name 

In [21]:
merged_on_address_match,merged_on_address_no_match =  separate_matches(merged_on_address, 'uprn', 'address_1')


In [22]:
## Add the merged_on_address_match to the matched
merged_on_address_match = merged_on_address_match.rename(columns={"uprn": "uprn_OSAB"})
#merged_on_address_match['match_strategy'] = 'address_1'


In [25]:
all_matched = merged_on_address_match

In [26]:
#all_matched = pd.concat([all_matched, merged_on_address_match])


In [112]:
# we would want to create a new column on this dataset that cleans up the site name
merged_on_address_no_match['short_site_name_LPA]' ] = np.where(merged_on_address_no_match['site_name_LPA'].isnull() == False ,
                                                        merged_on_address_no_match['site_name_LPA'].apply(lambda x: x.split(',')[0] if not pd.isna(x) else x ).str.lower(), 
                                                        merged_on_address_no_match['site_name_GLA'].apply(lambda x: x.split('\r\r\n')[0] if not pd.isna(x) else x ).str.lower()  )





In [27]:
merged_on_address_no_match = merged_on_address_no_match.dropna(axis=1, how='all')


In [29]:
##-------- DATA MERGING --------  STRATEGY 2 ADDRESS MATCH
# concat AB Building number + AB street description = site_name_LPA before comma
    #  and postcode  = postcode 
#create new column and oncatenate the buildign number and street

resi_AB['number_street'] = np.where(resi_AB['building_name'] != 0,  resi_AB['building_name'].apply(lambda x: re.sub('[A-Z]', '',x) if type(x) == str else x)
, resi_AB['building_number'])

In [30]:
resi_AB['number_street'] =  resi_AB['number_street'] + ' ' + resi_AB['street_description'].str.lower()

In [31]:
left_columns_2 = ['short_site_name_LPA]', 'postcode_clean'] 
right_columns_2 = ['number_street', 'postcode_locator']

merged_on_address2 = merged_on_address_no_match.merge(resi_AB, how = 'left', left_on = left_columns_2, right_on = right_columns_2)

In [32]:
merged_on_address2_match = merged_on_address2.drop(merged_on_address2[pd.isna(merged_on_address2['uprn']) == True].index) 
merged_on_address2_no_match = merged_on_address2.drop(merged_on_address2[pd.isna(merged_on_address2['uprn']) == False].index) 


In [33]:
merged_on_address2_match = merged_on_address2_match.rename(columns={"uprn": "uprn_OSAB"})
merged_on_address2_match = merged_on_address2_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address2_match['match_strategy'] = 'address_2'

In [34]:
all_matched = pd.concat([all_matched, merged_on_address2_match])

In [35]:
merged_on_address2_no_match = merged_on_address2_no_match.dropna(axis=1, how='all')


In [121]:
# #-------- DATA MERGING --------  STRATEGY 3 ADDRESS MATCH 
# concat AB Building number + AB street description = site_name_LPA before comma
    # And AB administrative_area = LPA_name
left_columns_3 = ['short_site_name_LPA]', 'lpa_name'] 
right_columns_3 = ['number_street', 'administrative_area']

merged_on_address2_no_match['lpa_name'] = merged_on_address2_no_match['lpa_name'].str.upper()
#merged_on_address3 = merged_on_address2_no_match.merge(resi_AB, how = 'left', left_on = left_columns_3, right_on = right_columns_3)

merged_on_address3_match, merged_on_address3_no_match = my_merge(merged_on_address2_no_match, resi_AB, left_columns_3, right_columns_3)

In [122]:
# add the matched into all matched
merged_on_address3_match = merged_on_address3_match.rename(columns={"uprn": "uprn_OSAB"})
merged_on_address3_match = merged_on_address3_match.drop(columns = ['short_site_name_LPA]']) 
merged_on_address3_match['match_strategy'] = 'address_3'

frames = [all_matched, merged_on_address3_match]

all_matched = pd.concat(frames)

In [123]:
merged_on_address3_no_match = merged_on_address3_no_match.dropna(axis=1, how='all')

In [112]:
#columns_to_drop = ['legal_name','sub_building_name','building_name','building_number','street_description','dependent_locality','locality','town_name','administrative_area','post_town','postcode','uprn','class','parent_uprn','latitude','longitude','country', 'number_street_y']
#merged_on_address3_no_match = merged_on_address3_no_match.drop(columns=columns_to_drop)

In [None]:
#  STRATEGY 4 ADDRESS MATCH 
#match based on address number and postcode
#left_columns_4 = ['site_number_clean', 'postcode_clean']
#right_columns_4 = ['building_number', 'postcode']

# this is a loose join 
#merged_on_address4 = merged_on_address3_no_match.merge(resi_AB, how = 'left', left_on = left_columns_4, right_on = right_columns_4)

#other ideas: 
# if site name starts with a non number, remove the first comma and try strat 2 


In [125]:
##-------- DATA MERGING --------  COMMERCIAL PROPERTIES --- Reproduce the process with commercial properties 
#Merge on Parent UPRN 
non_resi_merged = merged_on_address3_no_match.merge(other_AB,how = 'left', left_on = 'uprn_x', right_on = 'parent_uprn')

non_resi_match, non_resi_no_match =  separate_matches(non_resi_merged, 'parent_uprn', 'parent_uprn')

non_resi_no_match = non_resi_no_match.dropna(axis=1, how='all')

In [126]:
#merge on UPRN
non_resi_merged_2 = non_resi_no_match.merge(other_AB, how = 'left', left_on = 'uprn_x', right_on = 'uprn')

non_resi_match_2, non_resi_no_match_2 =  separate_matches(non_resi_merged_2, 'parent_uprn', 'uprn')

non_resi_no_match_2 = non_resi_no_match_2.dropna(axis=1, how='all')

In [127]:
non_resi_match_2 = non_resi_match_2.rename(columns={"uprn": "uprn_OSAB"})
#non_resi_match_2['match_strategy'] = 'uprn'
non_resi_match_2.count()

non_resi_match = non_resi_match.rename(columns={"uprn": "uprn_OSAB"})
#non_resi_match['match_strategy'] = 'parent_uprn'

In [128]:
#union them together 
non_resi_all_matched = pd.concat([non_resi_match_2, non_resi_match])

In [129]:
#now join on addresses strat 1 
left_columns = ['site_number_clean', 'street_name', 'postcode_clean']
right_columns = ['building_number', 'street_description', 'postcode']

non_resi_address_merge_match, non_resi_address_merge_no_match = my_merge(non_resi_no_match_2, other_AB, left_columns, right_columns)

In [130]:
non_resi_address_merge_no_match = non_resi_address_merge_no_match.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match = non_resi_address_merge_match.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match['match_strategy'] = 'address_1'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match])

In [131]:
#join addresses strat 2
#make change to the other_AB dataset 
other_AB['number_street'] = np.where(other_AB['building_name'] != 0,  other_AB['building_name'].apply(lambda x: re.sub('[A-Z]', '',x) if type(x) == str else x)
, other_AB['building_number'])
other_AB['number_street'] =  other_AB['number_street'] + ' ' + other_AB['street_description'].str.lower()


left_columns_2 = ['short_site_name_LPA]', 'postcode_clean'] 
right_columns_2 = ['number_street', 'postcode']
 
non_resi_address_merge_match2, non_resi_address_merge_no_match2 = my_merge(non_resi_address_merge_no_match, other_AB, left_columns_2,right_columns_2 )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_AB['number_street'] = np.where(other_AB['building_name'] != 0,  other_AB['building_name'].apply(lambda x: re.sub('[A-Z]', '',x) if type(x) == str else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_AB['number_street'] =  other_AB['number_street'] + ' ' + other_AB['street_description'].str.lower()


In [132]:
non_resi_address_merge_no_match2 = non_resi_address_merge_no_match2.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match2 = non_resi_address_merge_match2.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match2['match_strategy'] = 'address_2'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match2])

In [133]:
# STRATEGY 3 ADDRESS MATCH 
# concat AB Building number + AB street description = site_name_LPA before comma
    # And AB administrative_area = LPA_name
left_columns_3 = ['short_site_name_LPA]', 'lpa_name'] 
right_columns_3 = ['number_street', 'administrative_area']

non_resi_address_merge_no_match2['lpa_name'] = non_resi_address_merge_no_match2['lpa_name'].str.upper()
#merged_on_address3 = merged_on_address2_no_match.merge(resi_AB, how = 'left', left_on = left_columns_3, right_on = right_columns_3)

non_resi_address_merge_match3, non_resi_address_merge_no_match3 = my_merge(non_resi_address_merge_no_match2, other_AB, left_columns_3, right_columns_3)

In [134]:
non_resi_address_merge_no_match3 = non_resi_address_merge_no_match3.dropna(axis=1, how='all')

## Add the merged_on_address_match to the matched
non_resi_address_merge_match3 = non_resi_address_merge_match3.rename(columns={"uprn": "uprn_OSAB"})
non_resi_address_merge_match3['match_strategy'] = 'address_3'

non_resi_all_matched = pd.concat([non_resi_all_matched, non_resi_address_merge_match3])

In [71]:
## ------------------------------- Post Match  -------------------------------

In [36]:
#print match rate
resi_match_rate = 100 * all_matched['ID'].nunique() / london_data['ID'].nunique()

print('Resi Match rate: ', resi_match_rate) 

Resi Match rate:  0.0


In [136]:
other_match_rate = 100 * non_resi_all_matched['ID'].nunique() / london_data['ID'].nunique()

print('Other Match rate: ', other_match_rate) 

Other Match rate:  22.791638570465274


In [137]:
#before export, make all these columns blank instead of 0 
#'legal_name', 'sub_building_name', 'building_name','building_number','street_description', 'dependent_locality	locality
all_matched['street_name'] = all_matched['street_name'].replace(0,'')
all_matched['legal_name'] = all_matched['legal_name'].replace(0,'')
all_matched['sub_building_name'] = all_matched['sub_building_name'].replace(0,'')
all_matched['building_name'] = all_matched['building_name'].replace(0,'')
all_matched['building_number'] = all_matched['building_number'].replace(0,'')
all_matched['street_description'] = all_matched['street_description'].replace(0,'')
all_matched['dependent_locality'] = all_matched['dependent_locality'].replace(0,'')
all_matched['locality'] = all_matched['locality'].replace(0,'')
all_matched['post_town'] = all_matched['post_town'].replace(0,'')
all_matched['postcode'] = np.where(all_matched['postcode'] == 0, all_matched['postcode_clean'], all_matched['postcode'])


In [76]:
all_matched.head(20)

Unnamed: 0,UCL_ID,planning_application_number,lpa_name,application_type,application_type_full,description,number_of_units,site_number_clean,street_name,postcode_clean,...,town_name,administrative_area,post_town,postcode,match_strategy,Number_units_found,FPP_PA_mix?,number_street,number_street_x,number_street_y
42,54,21/01316/PREZA,Newham,Prior Approval,Prior Approval: Change of use - light industri...,Application to determine if prior approval is ...,,2A,Boundary Road,E13 9PR,...,LONDON,NEWHAM,LONDON,E13 9PR,uprn,,,,,
87,123,21/00771/PRECOU,Newham,Prior Approval,Prior Approval: Change of use - retail/service...,Prior approval for the change of the retail un...,3.0,140,Portway,E15 3QW,...,LONDON,NEWHAM,LONDON,E15 3QW,uprn,,,,,
93,132,19/AP/1141,Southwark,Prior Approval,Prior Approval: Change of use - storage to dwe...,Notification for prior approval for a change o...,4.0,4A,,SE1 4QG,...,LONDON,SOUTHWARK,LONDON,SE1 4QG,uprn,,,,,
99,142,22/02610/PRECOU,Newham,Prior Approval,"Prior Approval: Change of use from Commercial,...",Prior approval for the change of use of the ba...,,140,Portway,E15 3QW,...,LONDON,NEWHAM,LONDON,E15 3QW,uprn,,,,,
102,145,22/AP/3633,Southwark,Prior Approval,"Prior Approval: Change of use from Commercial,...",Prior approval notification for the change of ...,,3,ONEGA GATE,SE16 7PF,...,LONDON,SOUTHWARK,,SE16 7PF,uprn,,,,,
106,149,22/03970/PIAPA,Westminster,Prior Approval,"Prior Approval: Change of use from Commercial,...",Application for Prior Approval Under Class G o...,,54,Rochester Row,SW1P 1JU,...,LONDON,CITY OF WESTMINSTER,LONDON,SW1P 1JU,uprn,,,,,
115,163,20/2001/PNO,Barnet,Prior Approval,,Change of use of ground and lower ground floor...,1.0,3,Leicester Road,EN5 5EW,...,BARNET,BARNET,BARNET,EN5 5EW,uprn,,,,,
118,167,22/1285/GPD26,Richmond,Prior Approval,,CHANGE OF USE TO SINGLE DWELLING HOUSE,1.0,5,Bridle Lane,TW1 3EG,...,TWICKENHAM,RICHMOND UPON THAMES,TWICKENHAM,TW1 3EG,uprn,,,,,
130,181,21/03383/PRIOR,Bexley,Prior Approval,,Notification for Prior Approval for change of ...,1.0,13,Sidcup High Street,DA14 6EP,...,SIDCUP,BEXLEY,SIDCUP,DA14 6EP,uprn,,,,,
134,185,DM2020/00970,Sutton,Prior Approval,,Prior Approval for change of use from an offic...,1.0,177,Gander Green Lane,SM1 2EZ,...,SUTTON,SUTTON,SUTTON,SM1 2EZ,uprn,,,,,


In [140]:
#write out to csv 

all_matched.to_csv('London2_address_matched_2603.csv', index=False) 

non_resi_all_matched.to_csv('London2_non_resi_match_2603.csv', index=False)  

In [141]:
non_resi_address_merge_no_match3.to_csv('London2_no_match_2603.csv', index = False)

In [86]:
non_resi_address_merge_no_match3[non_resi_address_merge_no_match3['ID'] == 1561]

Unnamed: 0,ID,planning_application_number,lpa_name,application_type,application_type_full,description,number_of_units,site_number_clean,street_name,postcode_clean,...,site_name_GLA,site_name_LPA,uprn_x,decision,status,application_date,decision_date,parsed_street_LPA,parsed_street_GLA,short_site_name_LPA]
24,1561,16/05471/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of third and fourth floor office...,4.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,09/12/2016,23/01/2017,High Street,,143 high street bromley br1 1jh
25,1561,16/05471/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of third and fourth floor office...,4.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,09/12/2016,23/01/2017,High Street,,143 high street bromley br1 1jh
26,1561,17/00490/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of second floor offices from Cla...,2.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,07/02/2017,23/02/2017,High Street,,143 high street bromley br1 1jh
27,1561,17/00490/RESPA,BROMLEY,Prior Approval,Prior Approval (Class O - formerly J),Change of use of second floor offices from Cla...,2.0,143,High Street,BR1 1JH,...,"143, High Street, BR1 1JH",143 High Street Bromley BR1 1JH,,Grant Prior Approval,Lapsed,07/02/2017,23/02/2017,High Street,,143 high street bromley br1 1jh
