In [13]:
import numpy as np
import pandas as pd
import json

In [14]:
file = None
with open("formatted-data.json", "r") as f:
    file = f.read()

data = json.loads(file)

label_set = set()
for item in data:
    label_set.add(item['label'])

label_set

{'AGRICULTURE',
 'BEACH AREA',
 'BUSINESS 1',
 'BUSINESS 1 - WHITE',
 'BUSINESS 2',
 'BUSINESS 2 - WHITE',
 'BUSINESS PARK',
 'BUSINESS PARK - WHITE',
 'CEMETERY',
 'CIVIC & COMMUNITY INSTITUTION',
 'COMMERCIAL',
 'COMMERCIAL & RESIDENTIAL',
 'COMMERCIAL / INSTITUTION',
 'EDUCATIONAL INSTITUTION',
 'HEALTH & MEDICAL CARE',
 'HOTEL',
 'LIGHT RAPID TRANSIT',
 'MASS RAPID TRANSIT',
 'OPEN SPACE',
 'PARK',
 'PLACE OF WORSHIP',
 'PORT / AIRPORT',
 'RESERVE SITE',
 'RESIDENTIAL',
 'RESIDENTIAL / INSTITUTION',
 'RESIDENTIAL WITH COMMERCIAL AT 1ST STOREY',
 'ROAD',
 'SPECIAL USE',
 'SPORTS & RECREATION',
 'TRANSPORT FACILITIES',
 'UTILITY',
 'WATERBODY',
 'WHITE'}

In [15]:
water_bodies_data = []
for item in data:
    if item['label'] == 'WATERBODY':
        water_bodies_data.append(item)

water_bodies_data

[{'label': 'WATERBODY',
  'coordinates': [[1.32961777556441, 103.63303953559],
   [1.32969246090765, 103.633069792738],
   [1.3296796362503, 103.633081582749],
   [1.32964132233831, 103.633116817986],
   [1.32856025346556, 103.634252635971],
   [1.32848497107875, 103.634331749417],
   [1.32832016533783, 103.634500969392],
   [1.32829263063715, 103.634527281128],
   [1.32738673043273, 103.635479132949],
   [1.32738662915101, 103.635479213827],
   [1.32734995550179, 103.635444577662],
   [1.32735531795156, 103.635439608258],
   [1.32825594708233, 103.634493327867],
   [1.32828099548554, 103.634464374595],
   [1.32843591583179, 103.63428593719],
   [1.32959189630941, 103.633071337363],
   [1.32961226861507, 103.633046850231],
   [1.32961777556441, 103.63303953559]]},
 {'label': 'WATERBODY',
  'coordinates': [[1.33731595614256, 103.635767674884],
   [1.33731117375347, 103.635765970712],
   [1.33734258777839, 103.635735025628],
   [1.33736130380553, 103.635716589417],
   [1.33737967620346, 

In [16]:
open_spaces_data = []
for item in data:
    if item['label'] == 'OPEN SPACE' or item['label'] == 'PARK':
        open_spaces_data.append(item)

open_spaces_data

[{'label': 'OPEN SPACE',
  'coordinates': [[1.33889054098421, 103.816575999563],
   [1.33893989409157, 103.816594759341],
   [1.33897939864276, 103.816609609557],
   [1.33907549302405, 103.816643024518],
   [1.33912140318851, 103.816657875591],
   [1.3391512870705, 103.816666957125],
   [1.33920753770351, 103.816676723183],
   [1.33925053663582, 103.816679479672],
   [1.33928035894941, 103.816678101081],
   [1.3393081003062, 103.81667465492],
   [1.3393351480154, 103.816671207865],
   [1.33935872656363, 103.816665007648],
   [1.33938161326355, 103.816657427248],
   [1.33940866002117, 103.81664709094],
   [1.33942461115456, 103.816640198884],
   [1.33943917498341, 103.816631931143],
   [1.33946646498091, 103.816616636583],
   [1.33949753153626, 103.816591435382],
   [1.34016859407765, 103.816029073721],
   [1.34069387308103, 103.815121107026],
   [1.34046326575923, 103.814906008863],
   [1.34057061288125, 103.814782381544],
   [1.34103243446258, 103.814747273137],
   [1.34106746771771, 

In [17]:
from shapely.geometry import Point, Polygon

waterbody_test = water_bodies_data[0]

def distance_to_area(current_coordinates, area):
    lat, lon = current_coordinates
    point = Point(lat, lon)
    polygon_coord = area['coordinates']
    if isinstance(polygon_coord[0][0], list):
        polygon_coord = polygon_coord[0]
        polygon_coord = [[lat, lon] for lat, lon, _ in polygon_coord]
    if len(polygon_coord) < 4:
        return float('inf')
    polygon = Polygon(polygon_coord)
    return point.distance(polygon)

def is_point_inside_area(current_coordinates, area):
    lat, lon = current_coordinates
    point = Point(lat, lon)
    polygon_coord = area['coordinates']
    if isinstance(polygon_coord[0][0], list):
        polygon_coord = polygon_coord[0]
        polygon_coord = [[lat, lon] for lat, lon, _ in polygon_coord]
    if len(polygon_coord) < 4:
        return False
    polygon = Polygon(polygon_coord)
    return polygon.contains(point)

In [18]:
df = pd.DataFrame(
    columns=['latitude', 'longitude', 'land_use_label', 'distance_to_waterbody', 'distance_to_open_space']
)

In [19]:
p = Point(1, 2)
p.y

2.0

In [20]:
import math 

threshold = 0.5 # in km

def haversine(lon1, lat1, lon2, lat2):
    # Convert latitude and longitude from degrees to radians
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    
    # Radius of Earth in kilometers. Use 6371 for kilometers
    R = 6371.0
    distance = R * c
    
    return distance

def within_distance(point1, point2):
    return haversine(point1.y, point1.x, point2.y, point2.x) < threshold

In [21]:
from tqdm import tqdm

appended_points = []

for curr in tqdm(data, desc="Processing Land Use Data"):
    
    label, min_distance_to_waterbody, min_distance_to_open_space = None, None, None

    # Determine the center of the current land use zone
    polygon_coord = curr['coordinates']

    if isinstance(polygon_coord[0][0], list):
        # print("ohayo")
        polygon_coord = polygon_coord[0]
        polygon_coord = [[lat, lon] for lat, lon, _ in polygon_coord]

    # Skips the current land use zone
    if len(polygon_coord) < 4:
        # print("HEY")
        # print(curr)
        continue

    polygon = Polygon(polygon_coord)
    center = polygon.centroid

    to_skip = False
    
    # Check if the center is within 1 km radius of those that are calculated
    for point in appended_points:
        if within_distance(center, point):
            to_skip = True
            break

    if to_skip: continue

    coord = [center.x, center.y]
    appended_points.append(center)
    
    label = curr['label']

    for waterbody in water_bodies_data:
        distance = distance_to_area(coord, waterbody)
        if min_distance_to_waterbody is None or distance < min_distance_to_waterbody:
            min_distance_to_waterbody = distance

    for open_space in open_spaces_data:
        distance = distance_to_area(coord, open_space)
        if min_distance_to_open_space is None or distance < min_distance_to_open_space:
            min_distance_to_open_space = distance

    new_df = {
        'latitude': coord[0],
        'longitude': coord[1],
        'land_use_label': label,
        'distance_to_waterbody': min_distance_to_waterbody,
        'distance_to_open_space': min_distance_to_open_space
    }

    df.loc[len(df)] = new_df

Processing Land Use Data: 100%|██████████| 113212/113212 [23:24<00:00, 80.62it/s] 


In [22]:
appended_points

[<POINT (1.341 103.817)>,
 <POINT (1.342 103.835)>,
 <POINT (1.376 103.734)>,
 <POINT (1.404 103.867)>,
 <POINT (1.331 103.64)>,
 <POINT (1.337 103.639)>,
 <POINT (1.329 103.634)>,
 <POINT (1.343 103.637)>,
 <POINT (1.44 103.853)>,
 <POINT (1.446 103.867)>,
 <POINT (1.454 103.851)>,
 <POINT (1.418 103.97)>,
 <POINT (1.407 103.954)>,
 <POINT (1.415 104.044)>,
 <POINT (1.436 104.065)>,
 <POINT (1.407 103.978)>,
 <POINT (1.407 103.773)>,
 <POINT (1.401 103.78)>,
 <POINT (1.353 103.79)>,
 <POINT (1.335 103.644)>,
 <POINT (1.331 103.65)>,
 <POINT (1.33 103.645)>,
 <POINT (1.326 103.642)>,
 <POINT (1.325 103.648)>,
 <POINT (1.419 103.909)>,
 <POINT (1.414 103.903)>,
 <POINT (1.402 103.898)>,
 <POINT (1.348 103.639)>,
 <POINT (1.329 103.656)>,
 <POINT (1.317 103.652)>,
 <POINT (1.323 103.66)>,
 <POINT (1.324 103.653)>,
 <POINT (1.318 103.658)>,
 <POINT (1.318 103.646)>,
 <POINT (1.32 103.64)>,
 <POINT (1.313 103.642)>,
 <POINT (1.313 103.652)>,
 <POINT (1.311 103.66)>,
 <POINT (1.307 103.65)>

In [23]:
df

Unnamed: 0,latitude,longitude,land_use_label,distance_to_waterbody,distance_to_open_space
0,1.341371,103.816937,OPEN SPACE,0.001572,0.000000
1,1.341741,103.834931,ROAD,0.001046,0.000068
2,1.376230,103.734428,PLACE OF WORSHIP,0.000681,0.000333
3,1.404280,103.866961,ROAD,0.009188,0.001341
4,1.330695,103.639991,BUSINESS 2,0.002164,0.015768
...,...,...,...,...,...
1401,1.372259,103.917054,ROAD,0.004993,0.005235
1402,1.331814,103.630235,WATERBODY,0.000000,0.018562
1403,1.346468,104.028241,RESERVE SITE,0.009619,0.000282
1404,1.282489,103.780019,UTILITY,0.006392,0.000158


In [24]:
df.to_csv("land_use_data.csv", index=False)