In [1]:
import geopandas as gpd
import yaml


In [2]:
gis_boundaries = gpd.read_file('data/statistical-gis-boundaries-london/ESRI/London_Ward_CityMerged.shp')

In [3]:
gis_boundaries.head()

Unnamed: 0,NAME,GSS_CODE,HECTARES,NONLD_AREA,LB_GSS_CD,BOROUGH,POLY_ID,geometry
0,Chessington South,E05000405,755.173,0.0,E09000021,Kingston upon Thames,50840,"POLYGON ((516401.600 160201.800, 516407.300 16..."
1,Tolworth and Hook Rise,E05000414,259.464,0.0,E09000021,Kingston upon Thames,117160,"POLYGON ((517829.600 165447.100, 517837.100 16..."
2,Berrylands,E05000401,145.39,0.0,E09000021,Kingston upon Thames,50449,"POLYGON ((518107.500 167303.400, 518114.300 16..."
3,Alexandra,E05000400,268.506,0.0,E09000021,Kingston upon Thames,50456,"POLYGON ((520480.000 166909.800, 520490.700 16..."
4,Beverley,E05000402,187.821,0.0,E09000021,Kingston upon Thames,117161,"POLYGON ((522071.000 168144.900, 522063.900 16..."


In [4]:
len(gis_boundaries)

625

In [5]:
gis_boundaries.NAME.unique()

array(['Chessington South', 'Tolworth and Hook Rise', 'Berrylands',
       'Alexandra', 'Beverley', 'Coombe Hill',
       'Chessington North and Hook', 'Surbiton Hill', 'Old Malden',
       "St. Mark's", 'Grove', 'Canbury', 'Norbiton', 'Coombe Vale',
       'St. James', 'Tudor', 'Coulsdon East', 'Selsdon and Ballards',
       'Coulsdon West', 'Waddon', 'Kenley', 'Purley', 'Sanderstead',
       'Heathfield', 'Fairfield', 'Broad Green', 'West Thornton',
       'Bensham Manor', 'Norbury', 'New Addington', 'Croham', 'Fieldway',
       'Shirley', 'Selhurst', 'Ashburton', 'Woodside', 'Thornton Heath',
       'Upper Norwood', 'South Norwood', 'Addiscombe', 'Darwin',
       'Hayes and Coney Hall', 'Bromley Common and Keston',
       'Chelsfield and Pratts Bottom', 'Biggin Hill', 'West Wickham',
       'Clock House', 'Kelsey and Eden Park', 'Farnborough and Crofton',
       'Shortlands', 'Bromley Town', 'Bickley', 'Petts Wood and Knoll',
       'Crystal Palace', 'Penge and Cator', 'Copers Cope'

In [6]:
gis_boundaries.to_crs('EPSG:4326').crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [7]:
parcels = gpd.read_file('data/london_select_parcels.geojson')
parcels_x_wards = parcels.to_crs('EPSG:4326').sjoin(gis_boundaries.to_crs('EPSG:4326'), predicate='within')
len(parcels_x_wards)

15942

In [8]:
parcels_x_wards.head()

Unnamed: 0,OBJECTID,SHAPE_Length,SHAPE_Area,geometry,index_right,NAME,GSS_CODE,HECTARES,NONLD_AREA,LB_GSS_CD,BOROUGH,POLY_ID
0,2562373,720.403206,6628.84615,"MULTIPOLYGON (((-0.14006 51.49382, -0.14028 51...",483,Vincent Square,E05000646,68.355,8.89,E09000033,Westminster,117485
1,2562384,555.76249,6923.9991,"MULTIPOLYGON (((-0.14142 51.49604, -0.14039 51...",483,Vincent Square,E05000646,68.355,8.89,E09000033,Westminster,117485
2,2562387,629.947674,10570.5603,"MULTIPOLYGON (((-0.13956 51.49634, -0.13836 51...",483,Vincent Square,E05000646,68.355,8.89,E09000033,Westminster,117485
3,2562396,439.552897,4390.7026,"MULTIPOLYGON (((-0.13796 51.49569, -0.13778 51...",483,Vincent Square,E05000646,68.355,8.89,E09000033,Westminster,117485
4,2562400,391.24614,7202.12535,"MULTIPOLYGON (((-0.13764 51.49565, -0.13839 51...",483,Vincent Square,E05000646,68.355,8.89,E09000033,Westminster,117485


In [9]:
in_sample = []
for n in parcels_x_wards['NAME'].unique():
    try:
        in_sample += list(parcels_x_wards[parcels_x_wards['NAME'] == n].sample(200, random_state=42).index)
    except ValueError:
        n_sample = len(parcels_x_wards[parcels_x_wards['NAME'] == n])
        print(n, n_sample)
        in_sample += list(parcels_x_wards[parcels_x_wards['NAME'] == n].sample(int(n_sample/10), random_state=42).index)
len(in_sample)

Vincent Square 60
Warwick 63
Knightsbridge and Belgravia 103
Tachbrook 30
Churchill 65
Cathedrals 192
East Walworth 106
Newington 75
Chaucer 90
Oval 115
Grange 119
Riverside 120
Faraday 70
Surrey Docks 152
New Cross 194
The Lane 117
Latchmere 107
Brockley 160
Ladywell 6
Crofton Park 8
Telegraph Hill 139
Evelyn 170
Peckham Rye 126
Nunhead 88
Rotherhithe 139
Livesey 153
St. Katharine's and Wapping 116
South Bermondsey 124
Camberwell Green 102
Peckham 91
Brunswick Park 75
Herne Hill 129
South Camberwell 133
East Dulwich 82
Blackwall and Cubitt Town 126
Greenwich West 74
Millwall 172
Village 178
Coldharbour 136
Prince's 101
Vassall 92
Stockwell 85
Bishop's 103
Ferndale 71
Larkhall 87
Clapham Common 79
Clapham Town 70
Brompton 74
Royal Hospital 52
Shaftesbury 69
Hans Town 65
St. Mary's Park 113
Stanley 61
Northcote 93
Brixton Hill 80
Tulse Hill 91
Balham 77
Thornton 101
Wandsworth Common 93
Fairfield 119
Cremorne 41
Parsons Green and Walham 70
Sands End 103
North End 52
Redcliffe 49
Fulham 

2572

In [10]:
import pandas as pd
addresses = pd.read_csv('data/fakelondonaddresses.csv')
addresses.head()

Unnamed: 0,display_address,city,postal_code
0,644 Richmond Road,London,SE13 7LM
1,98 Church Road,London,W63 9CB
2,88 Kings Road,London,N20 9KJ
3,39 Queens Road,London,EC17 3FP
4,9 Chester Road,London,N64 9BU


In [11]:
parcels['centroid_latitude'] = parcels.to_crs('EPSG:4326').centroid.y
parcels['centroid_longitude'] = parcels.to_crs('EPSG:4326').centroid.x


  parcels['centroid_latitude'] = parcels.to_crs('EPSG:4326').centroid.y

  parcels['centroid_longitude'] = parcels.to_crs('EPSG:4326').centroid.x


In [12]:
parcels.head()

Unnamed: 0,OBJECTID,SHAPE_Length,SHAPE_Area,geometry,centroid_latitude,centroid_longitude
0,2562373,720.403206,6628.84615,"MULTIPOLYGON (((529214.490 178848.230, 529199....",51.495289,-0.141481
1,2562384,555.76249,6923.9991,"MULTIPOLYGON (((529113.650 179092.040, 529185....",51.495315,-0.140407
2,2562387,629.947674,10570.5603,"MULTIPOLYGON (((529242.540 179129.530, 529328....",51.495587,-0.139376
3,2562396,439.552897,4390.7026,"MULTIPOLYGON (((529355.300 179059.710, 529367....",51.495956,-0.13865
4,2562400,391.24614,7202.12535,"MULTIPOLYGON (((529377.550 179055.690, 529323....",51.496293,-0.137812


In [13]:
parcels['has_listing'] = parcels.index.isin(in_sample)
parcels.head(20)

Unnamed: 0,OBJECTID,SHAPE_Length,SHAPE_Area,geometry,centroid_latitude,centroid_longitude,has_listing
0,2562373,720.403206,6628.84615,"MULTIPOLYGON (((529214.490 178848.230, 529199....",51.495289,-0.141481,True
1,2562384,555.76249,6923.9991,"MULTIPOLYGON (((529113.650 179092.040, 529185....",51.495315,-0.140407,False
2,2562387,629.947674,10570.5603,"MULTIPOLYGON (((529242.540 179129.530, 529328....",51.495587,-0.139376,False
3,2562396,439.552897,4390.7026,"MULTIPOLYGON (((529355.300 179059.710, 529367....",51.495956,-0.13865,False
4,2562400,391.24614,7202.12535,"MULTIPOLYGON (((529377.550 179055.690, 529323....",51.496293,-0.137812,False
5,2562401,393.23331,8561.69845,"MULTIPOLYGON (((529507.190 179193.450, 529507....",51.496262,-0.136403,True
6,2562402,443.691714,5504.1816,"MULTIPOLYGON (((529232.920 179155.190, 529213....",51.496499,-0.140726,False
7,2562428,2349.573745,38091.65295,"MULTIPOLYGON (((529593.660 179634.430, 529622....",51.500133,-0.135535,True
8,2562434,72.732093,330.58975,"MULTIPOLYGON (((529462.170 179609.170, 529465....",51.50051,-0.136326,False
9,2562509,962.590253,22127.096,"MULTIPOLYGON (((529210.060 179319.580, 529196....",51.497578,-0.140807,True


In [14]:
len(parcels)

16289

In [15]:
sample_parcels = parcels[parcels['has_listing'] == True].reset_index(drop=True).merge(addresses, left_index=True, right_index=True)
sample_parcels

Unnamed: 0,OBJECTID,SHAPE_Length,SHAPE_Area,geometry,centroid_latitude,centroid_longitude,has_listing,display_address,city,postal_code
0,2562373,720.403206,6628.84615,"MULTIPOLYGON (((529214.490 178848.230, 529199....",51.495289,-0.141481,True,644 Richmond Road,London,SE13 7LM
1,2562401,393.233310,8561.69845,"MULTIPOLYGON (((529507.190 179193.450, 529507....",51.496262,-0.136403,True,98 Church Road,London,W63 9CB
2,2562428,2349.573745,38091.65295,"MULTIPOLYGON (((529593.660 179634.430, 529622....",51.500133,-0.135535,True,88 Kings Road,London,N20 9KJ
3,2562509,962.590253,22127.09600,"MULTIPOLYGON (((529210.060 179319.580, 529196....",51.497578,-0.140807,True,39 Queens Road,London,EC17 3FP
4,2562517,1098.193317,22460.48230,"MULTIPOLYGON (((529310.830 179264.720, 529310....",51.498091,-0.138112,True,9 Chester Road,London,N64 9BU
...,...,...,...,...,...,...,...,...,...,...
2567,2604075,92.354289,556.95190,"MULTIPOLYGON (((527924.680 186649.330, 527940....",51.564181,-0.155567,True,85 South Street,London,E24 3DC
2568,2604076,414.164804,3402.81445,"MULTIPOLYGON (((528306.100 186701.570, 528305....",51.564478,-0.151621,True,73 Chester Road,London,SW79 3CH
2569,2604077,69.360973,296.14650,"MULTIPOLYGON (((527962.440 186689.780, 527963....",51.564482,-0.155371,True,839 The Crescent,London,E63 1OU
2570,2604078,196.984090,1670.80280,"MULTIPOLYGON (((528025.390 186728.360, 528033....",51.564684,-0.154589,True,179 King Street,London,E89 0UB


In [16]:
retyml = {'building_count': len(sample_parcels), 'region_key': 'london', 'location_info': [], 'display_address': []}
for lat, long, address in zip(sample_parcels['centroid_latitude'], sample_parcels['centroid_longitude'], sample_parcels['display_address']):
    retyml['location_info'].append({'centroid_latitude': lat, 'centroid_longitude': long})
    retyml['display_address'].append(address)
with open('sample_buildings.yml', 'w') as yaml_file:
    yaml.dump(retyml, yaml_file, default_flow_style=False,indent=2,sort_keys=False)

In [17]:
addresses = pd.read_csv('data/adds_london.csv')
parcels = parcels.merge(addresses, left_index=True, right_index=True)
retyml = {'building_count': len(parcels), 'region_key': 'london', 'location_info': [], 'display_address': []}
for lat, long, address in zip(parcels['centroid_latitude'], parcels['centroid_longitude'], parcels['display_address']):
    retyml['location_info'].append({'centroid_latitude': lat, 'centroid_longitude': long})
    retyml['display_address'].append(address)
with open('buildings.yml', 'w') as yaml_file:
    yaml.dump(retyml, yaml_file, default_flow_style=False,indent=2,sort_keys=False)

In [18]:
parcels.to_crs('EPSG:4326').to_file('central_london_parcels.geojson', driver="GeoJSON") 