In [5]:
import pandas as pd

#Read in planning application data with addresses converted to gps coordinates
#To reproduce code, replace with your own file path
df = pd.read_csv("/content/drive/MyDrive/project/planningAppsGeocoded.csv")

#Calculated how many addresses had been successfully geocoded
#94% success rate
num_success = df["geocode_success"].sum()
num_total = len(df)
num_fail = num_total - num_success

print("Total rows:", num_total)
print("Geocode success:", num_success)
print("Geocode fail:", num_fail)

#Reduced dataset to just those with GPS coordinates
df = df[df["geocode_success"] == True]

#Calculated proportions of accepted/rejected applications in reduced data
#The 0.87 proportion of accepted applications was maintained
num_decision_1 = (df["Decision Binary"] == 1).sum()
num_decision_0 = (df["Decision Binary"] == 0).sum()

print("\nAmong geocoded rows only:")
print("Binary decision = 1:", num_decision_1)
print("Binary decision = 0:", num_decision_0)


  df = pd.read_csv("/content/drive/MyDrive/project/planningAppsGeocoded.csv")


Total rows: 75373
Geocode success: 71088
Geocode fail: 4285

Among geocoded rows only:
Binary decision = 1: 62067
Binary decision = 0: 9021


In [6]:
import geopandas as gpd

#Read in geopackage file of small area boundaries
small_areas = gpd.read_file("/content/drive/MyDrive/project/Small_Area_National_Statistical_Boundaries_2022_Ungeneralised_view_-7354763930310470674.gpkg")

#Inspected column names
print(small_areas.columns)


Index(['SA_GUID_2016', 'SA_GUID_2022', 'SA_PUB2011', 'SA_PUB2016',
       'SA_PUB2022', 'SA_GEOGID_2022', 'SA_CHANGE_CODE', 'SA_URBAN_AREA_FLAG',
       'SA_URBAN_AREA_NAME', 'SA_NUTS1', 'SA_NUTS1_NAME', 'SA_NUTS2',
       'SA_NUTS2_NAME', 'SA_NUTS3', 'SA_NUTS3_NAME', 'ED_GUID', 'ED_OFFICIAL',
       'ED_ENGLISH', 'ED_GAEILGE', 'ED_ID_STR', 'ED_PART_COUNT', 'COUNTY_CODE',
       'COUNTY_ENGLISH', 'COUNTY_GAEILGE', 'CSO_LEA', 'geometry'],
      dtype='object')


In [7]:
from shapely.geometry import Point

#Converted dataframe to a geodataframe
#Created a Point for each row
df = gpd.GeoDataFrame(
    df,
    geometry=df.apply(lambda row: Point(row["lon"], row["lat"]), axis=1),
    crs="EPSG:4326"
)

#Aligned coordinate systems of small area and planning application data
df = df.to_crs(small_areas.crs)

#Performed a spatial join on the data by finding the small area each point belongs to
joined = gpd.sjoin(df, small_areas, how="left", predicate="within")

#Inspected the new data by looking at the planning app data along with its small area guid
keep_cols = list(df.columns) + ["SA_GUID_2022"]

joined = joined[keep_cols]

joined.sample(10, random_state=1)






Unnamed: 0,OBJECTID,Planning Authority,Application Number,Development Address,Development Postcode,Application Status,Application Type,Decision,Number of Residential Units,One-Off House,Shape__Area,Decision Binary,pa_clean,full_address,lat,lon,geocode_success,geometry,SA_GUID_2022
49907,172429.0,Meath County Council,212307,"Assisi , Kentstown Road Bailis , Navan Co. Meath",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,4530.208496,1,Meath,"Assisi , Kentstown Road Bailis , Navan Co. Mea...",53.642914,-6.66095,True,POINT (688540.666 766736.332),4c07d11e-3a9d-851d-e053-ca3ca8c0ca7f
32346,104744.0,Kilkenny County Council,19587,"Banagher , Piltown , Co Kilkenny",,APPLICATION FINALISED,PERMISSION,REFUSED,1.0,,9703.456055,0,Kilkenny,"Banagher , Piltown , Co Kilkenny, Ireland",52.351084,-7.327839,True,POINT (645792.157 622378.732),4c07d11e-03c4-851d-e053-ca3ca8c0ca7f
25212,79502.0,Kildare County Council,20780,"Rathconnel Wood , Nurney , Kildare",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,Y,17775.736816,1,Kildare,"Rathconnel Wood , Nurney , Kildare, Ireland",53.064396,-6.954312,True,POINT (670086.396 702040.939),4c07d11d-f41e-851d-e053-ca3ca8c0ca7f
47996,165082.0,Meath County Council,NA190831,"Cannistown , Ardsallagh , Navan Co. Meath",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,221.705566,1,Meath,"Cannistown , Ardsallagh , Navan Co. Meath, Ire...",53.616201,-6.657966,True,POINT (688794.024 763767.754),4c07d11e-0a7b-851d-e053-ca3ca8c0ca7f
3330,9769.0,Cavan County Council,2460471,"Cornagleragh or Oldtown , Cavan , Co. Cavan",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,17434.786133,1,Cavan,"Cornagleragh or Oldtown , Cavan , Co. Cavan, I...",53.969493,-7.347342,True,POINT (642821.839 802442.385),4c07d11e-0833-851d-e053-ca3ca8c0ca7f
5450,17183.0,Clare County Council,2320,"Knocknagroagh , Ballyvaughan , Co Clare",,APPLICATION FINALISED,PERMISSION,REFUSED,1.0,,5269.391602,0,Clare,"Knocknagroagh , Ballyvaughan , Co Clare, Ireland",53.115286,-9.148709,True,POINT (523099.714 707808.737),4c07d11d-f970-851d-e053-ca3ca8c0ca7f
29081,94926.0,Kerry County Council,211070,"LISSANEARLY WEST , ABBEYDORNEY , CO KERRY",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,Y,5685.592285,1,Kerry,"LISSANEARLY WEST , ABBEYDORNEY , CO KERRY, Ire...",52.346566,-9.687411,True,POINT (485033.883 623003.955),4c07d11e-0208-851d-e053-ca3ca8c0ca7f
55660,188907.0,Roscommon County Council,21382,"Cloonark , Cornafulla , Athlone",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,5999.759766,1,Roscommon,"Cloonark , Cornafulla , Athlone, Roscommon, Ir...",53.375793,-8.004085,True,POINT (599728.207 736178.918),4c07d11e-0cec-851d-e053-ca3ca8c0ca7f
59463,205748.0,Tipperary County Council,21709,"Cooleeney and Derryfadda , Moyne , Thurles Co....",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,449118.320312,1,Tipperary,"Cooleeney and Derryfadda , Moyne , Thurles Co....",52.735425,-7.686937,True,POINT (621142.593 664972.918),4c07d11d-f5b2-851d-e053-ca3ca8c0ca7f
14271,54599.0,Galway County Council,18978,"Drinaun ,",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,5703.510254,1,Galway,"Drinaun , Galway, Ireland",53.507153,-8.340773,True,POINT (577394.526 750850.016),4c07d11e-01ab-851d-e053-ca3ca8c0ca7f


In [8]:
#Loaded census data
#Replace with your own file path to reproduce code
census_data = pd.read_csv("/content/drive/MyDrive/project/censusDataCleaned.csv")

#Merged planning data and census data using small area guid field
merged = joined.merge(
    census_data,
    left_on="SA_GUID_2022",
    right_on="GUID",
    how="left"
)

#Corrected major errors by PHOTON API by removing entries where planning site local authority was not the same as the small area local authority

#Stripped and lower cased both local authoritys for uniform formatting
merged["LOCAL_AUTHORITY_norm"] = merged["LOCAL_AUTHORITY"].str.strip().str.upper()
merged["Planning_Authority_norm"] = merged["Planning Authority"].str.strip().str.upper()

#Removed rows where they weren't the same
merged = merged[merged["LOCAL_AUTHORITY_norm"] == merged["Planning_Authority_norm"]]

#Analysed proportions of accepted/rejected applications in new data
#Again, 0.87 acceptance rate was maintained
num_rows = len(merged)
num_decision_1 = (merged["Decision Binary"] == 1).sum()
num_decision_0 = (merged["Decision Binary"] == 0).sum()
prop_1 = num_decision_1 / num_rows
prop_0 = num_decision_0 / num_rows

print("Rows remaining:", num_rows)
print("Decision = 1:", num_decision_1)
print("Decision = 0:", num_decision_0)
print("Proportion decision = 1:", prop_1)
print("Proportion decision = 0:", prop_0)

#Inspected sample of new merged data
merged.sample(10, random_state=1)



Rows remaining: 66663
Decision = 1: 58056
Decision = 0: 8607
Proportion decision = 1: 0.8708878988344358
Proportion decision = 0: 0.1291121011655641


Unnamed: 0,OBJECTID,Planning Authority,Application Number,Development Address,Development Postcode,Application Status,Application Type,Decision,Number of Residential Units,One-Off House,...,OverHourJourney,NoLC,CollegeEducation,EmploymentRate,NoCentralHeating,ServedByPublicSystem_Water,ServedByPublicSystem_Sewerage,HasRenewableEnergy,LOCAL_AUTHORITY_norm,Planning_Authority_norm
18406,64415.0,Galway County Council,2360020,"Gardenfield , Tuam , Co. Galway",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.09375,0.220472,0.370079,0.561688,0.0,0.669291,0.015625,0.322034,GALWAY COUNTY COUNCIL,GALWAY COUNTY COUNCIL
15228,58924.0,Galway County Council,17876,"Ballinderry ,",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.066946,0.213636,0.309091,0.617021,0.026087,0.73913,0.043478,0.273585,GALWAY COUNTY COUNCIL,GALWAY COUNTY COUNCIL
32790,114015.0,Laois County Council,20139,"Camross Village , Camross , Co. Laois",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.153846,0.25,0.333333,0.635514,0.0,0.0,0.0,0.681818,LAOIS COUNTY COUNCIL,LAOIS COUNTY COUNCIL
33833,118558.0,Leitrim County Council,19258,"Beihy , Drumlish , Co. Leitrim",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.157609,0.334802,0.255507,0.469965,0.023438,0.837209,0.046875,0.284483,LEITRIM COUNTY COUNCIL,LEITRIM COUNTY COUNCIL
12731,53382.0,Galway County Council,20755,"Bellayarha North ,",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.080189,0.195238,0.419048,0.652,0.009434,0.201835,0.0,0.53,GALWAY COUNTY COUNCIL,GALWAY COUNTY COUNCIL
15159,58781.0,Galway County Council,191978,"Dunlo ,",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.082988,0.276515,0.280303,0.497093,0.007874,0.984962,0.759398,0.262712,GALWAY COUNTY COUNCIL,GALWAY COUNTY COUNCIL
44147,160637.0,Meath County Council,AA160117,"Primatestown , Ashbourne , Co. Meath",,APPLICATION FINALISED,PERMISSION,REFUSED,1.0,,...,0.128205,0.212121,0.388889,0.590517,0.011236,0.755319,0.021277,0.470588,MEATH COUNTY COUNCIL,MEATH COUNTY COUNCIL
2971,9196.0,Cavan County Council,2360315,"Mullahoran , Kilcogy , Co. Cavan",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,Y,...,0.117188,0.310345,0.206897,0.571429,0.0,0.275362,0.362319,0.390625,CAVAN COUNTY COUNCIL,CAVAN COUNTY COUNCIL
50237,181963.0,Offaly County Council,20417,"KILLURAN , CO. OFFALY , R35 CC80",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.088235,0.359649,0.219298,0.547445,0.0,0.309091,0.018182,0.408163,OFFALY COUNTY COUNCIL,OFFALY COUNTY COUNCIL
47334,173674.0,Monaghan County Council,16414,"Drumskelt , Newbliss , Co. Monaghan",,APPLICATION FINALISED,PERMISSION,CONDITIONAL,1.0,,...,0.102041,0.477064,0.119266,0.527132,0.013699,1.0,0.972603,0.298507,MONAGHAN COUNTY COUNCIL,MONAGHAN COUNTY COUNCIL


In [9]:
#Inspected column names in merged data
print(merged.columns)


Index(['OBJECTID', 'Planning Authority', 'Application Number',
       'Development Address', 'Development Postcode', 'Application Status',
       'Application Type', 'Decision', 'Number of Residential Units',
       'One-Off House', 'Shape__Area_x', 'Decision Binary', 'pa_clean',
       'full_address', 'lat', 'lon', 'geocode_success', 'geometry',
       'SA_GUID_2022', 'GUID', 'GEOGID', 'ED_ENGLISH', 'LOCAL_AUTHORITY',
       'Total Population (Normalisation)', 'Total dwellings (Normalisation)',
       'Shape__Area_y', 'PersonalVehicle', 'WalkCycle', 'PublicTransport',
       'PopulationDensity', 'UnderHalfHourJourney', 'UnderHourJourney',
       'OverHourJourney', 'NoLC', 'CollegeEducation', 'EmploymentRate',
       'NoCentralHeating', 'ServedByPublicSystem_Water',
       'ServedByPublicSystem_Sewerage', 'HasRenewableEnergy',
       'LOCAL_AUTHORITY_norm', 'Planning_Authority_norm'],
      dtype='object')


In [10]:
#Created list of columns to keep for training model
keep_cols = [
    "OBJECTID",
    "Planning Authority",
    "full_address",
    "Decision",
    "Decision Binary",
    "Shape__Area_x",
    "ED_ENGLISH",
    "Total Population (Normalisation)",
    "Total dwellings (Normalisation)",
    "PopulationDensity",
    "Shape__Area_y",
    "PersonalVehicle",
    "WalkCycle",
    "PublicTransport",
    "UnderHalfHourJourney",
    "UnderHourJourney",
    "OverHourJourney",
    "NoLC",
    "CollegeEducation",
    "EmploymentRate",
    "NoCentralHeating",
    "ServedByPublicSystem_Water",
    "ServedByPublicSystem_Sewerage",
    "HasRenewableEnergy"
]

#Reduced dataset to only necessary columns
merged_and_reduced = merged[keep_cols].copy()

#Renamed shape areas for both site and local area
merged_and_reduced = merged_and_reduced.rename(columns={
    "Shape__Area_x": "site_shape_area",
    "Shape__Area_y": "smallarea_shape_area"
})

#Saved new merged data to a csv
merged_and_reduced.to_csv("/content/drive/MyDrive/project/mergedData.csv", index=False)

#Inspected sample of final merged and reduced data
merged_and_reduced.sample(10, random_state=1)



Unnamed: 0,OBJECTID,Planning Authority,full_address,Decision,Decision Binary,site_shape_area,ED_ENGLISH,Total Population (Normalisation),Total dwellings (Normalisation),PopulationDensity,...,UnderHalfHourJourney,UnderHourJourney,OverHourJourney,NoLC,CollegeEducation,EmploymentRate,NoCentralHeating,ServedByPublicSystem_Water,ServedByPublicSystem_Sewerage,HasRenewableEnergy
18406,64415.0,Galway County Council,"Gardenfield , Tuam , Co. Galway, Ireland",CONDITIONAL,1,9684.396973,KILLEEN,380.0,141.0,3.1e-05,...,0.638393,0.267857,0.09375,0.220472,0.370079,0.561688,0.0,0.669291,0.015625,0.322034
15228,58924.0,Galway County Council,"Ballinderry , Galway, Ireland",CONDITIONAL,1,10522.69043,BALLINDERRY,361.0,128.0,3e-05,...,0.535565,0.39749,0.066946,0.213636,0.309091,0.617021,0.026087,0.73913,0.043478,0.273585
32790,114015.0,Laois County Council,"Camross Village , Camross , Co. Laois, Ireland",CONDITIONAL,1,5523.181641,MARYMOUNT,148.0,59.0,1.1e-05,...,0.483516,0.362637,0.153846,0.25,0.333333,0.635514,0.0,0.0,0.0,0.681818
33833,118558.0,Leitrim County Council,"Beihy , Drumlish , Co. Leitrim, Ireland",CONDITIONAL,1,6059.454102,BEIHY,336.0,164.0,1.5e-05,...,0.559783,0.282609,0.157609,0.334802,0.255507,0.469965,0.023438,0.837209,0.046875,0.284483
12731,53382.0,Galway County Council,"Bellayarha North , Galway, Ireland",CONDITIONAL,1,7345.529785,BULLAUN,340.0,129.0,2.5e-05,...,0.646226,0.273585,0.080189,0.195238,0.419048,0.652,0.009434,0.201835,0.0,0.53
15159,58781.0,Galway County Council,"Dunlo , Galway, Ireland",CONDITIONAL,1,94895.475098,BALLINASLOE URBAN,449.0,156.0,0.000349,...,0.792531,0.124481,0.082988,0.276515,0.280303,0.497093,0.007874,0.984962,0.759398,0.262712
44147,160637.0,Meath County Council,"Primatestown , Ashbourne , Co. Meath, Ireland",REFUSED,0,221.108398,KILBREW,292.0,102.0,5.2e-05,...,0.576923,0.294872,0.128205,0.212121,0.388889,0.590517,0.011236,0.755319,0.021277,0.470588
2971,9196.0,Cavan County Council,"Mullahoran , Kilcogy , Co. Cavan, Ireland",CONDITIONAL,1,12378.716309,KILCOGY,204.0,85.0,4.2e-05,...,0.554688,0.328125,0.117188,0.310345,0.206897,0.571429,0.0,0.275362,0.362319,0.390625
50237,181963.0,Offaly County Council,"KILLURAN , CO. OFFALY , R35 CC80, Ireland",CONDITIONAL,1,6982.763672,KILLEIGH,167.0,57.0,0.00093,...,0.754902,0.156863,0.088235,0.359649,0.219298,0.547445,0.0,0.309091,0.018182,0.408163
47334,173674.0,Monaghan County Council,"Drumskelt , Newbliss , Co. Monaghan, Ireland",CONDITIONAL,1,68614.691406,NEWBLISS,176.0,76.0,0.004242,...,0.704082,0.193878,0.102041,0.477064,0.119266,0.527132,0.013699,1.0,0.972603,0.298507
