In [1]:
import datetime
import glob
import math
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import box
import networkx as nx
from shapely.geometry import Point
import imageio

pd.options.mode.chained_assignment = None  # default='warn'


In [3]:
path = "/geodata"

In [4]:

def load_data(file_name, minx, miny, maxx, maxy):
    # crop data
    bbox = box(minx, miny, maxx, maxy)
    # building point dataset
    gdf_buildings = gpd.read_file(os.path.join(path, file_name), bbox=bbox)
    # gdf_buildings.IgnProb_bl = 0.02
    # xmin,ymin,xmax,ymax = gdf_buildings.total_bounds
    return gdf_buildings

In [17]:
# set up & load input data
# gdf = load_data("buildings_raw_pts.shp", 1748570, 5426959, 1748841, 5427115)
# gdf_polygon = load_data("shapes_1200.shp"), 178570, 5426959, 1748841, 5427115)

## No cropping, there are only 1728 assets

gdf_polygon = gpd.read_file(os.path.join(path, "shapes_1200.shp"))
                            
gdf_polygon["area"] = gdf_polygon['geometry'].area  # m2
gdf = gdf_polygon.copy()
gdf['geometry'] = gdf['geometry'].centroid
gdf['X'] = gdf.centroid.x
gdf['Y'] = gdf.centroid.y
gdf['d_short'] = gdf_polygon.exterior.distance(gdf)
gdf['d_long'] = gdf['area'] / gdf['d_short']


## Costly function

Observations - **using pd.merge** note that only one core is doing the work.

Some potential to get spark + geomesa in action here??

In [21]:

def build_edge_list(geodataframe, maximum_distance, polygon_file):
    # create arrays for different id combination
    n = np.arange(0, len(geodataframe))
    target = [n] * len(geodataframe)
    target = np.hstack(target)
    source = np.repeat(n, len(geodataframe))

    # put arrays in dataframe
    df = pd.DataFrame()
    df['source_id'] = source
    df['target_id'] = target
    # merge source attributes with source index
    geo_df = geodataframe.copy()
    geo_df['id'] = geo_df.index
    
    # create source / target gdf from gdf.columns of interest
    geo_df = geo_df[['id', 'TARGET_FID', 'X', 'Y', 'geometry', 'IgnProb_bl']]
    geo_df_TRG = geo_df.copy()
    geo_df_TRG.columns = ['target_' + str(col) for col in geo_df_TRG.columns]
    geo_df_SRC = geo_df.copy()
    geo_df_SRC.columns = ['source_' + str(col) for col in geo_df_SRC.columns]
    
    # merge data
    merged_data = pd.merge(df, geo_df_SRC, left_on='source_id', right_on='source_id', how='outer')
    merged_data = pd.merge(merged_data, geo_df_TRG, left_on='target_id', right_on='target_id', how='outer')
    merged_data.rename(columns={'source_id': 'source', 'target_id': 'target'}, inplace=True)
    
    # calculate distance for each source / target pair
    # create a df from polygon shape to get accurate distance
    # print(list(polygon_file))
    polygon = polygon_file[['TARGET_FID', 'geometry']]
    # print(list(polygon))
    source_poly = merged_data[['source_TARGET_FID']]
    target_poly = merged_data[['target_TARGET_FID']]
    # print(list(source_poly))
    src_poly = pd.merge(source_poly, polygon, left_on='source_TARGET_FID', right_on='TARGET_FID', how='left')
    trg_poly = pd.merge(target_poly, polygon, left_on='target_TARGET_FID', right_on='TARGET_FID', how='left')
    src_poly_gdf = gpd.GeoDataFrame(src_poly, geometry='geometry')
    trg_poly_gdf = gpd.GeoDataFrame(trg_poly, geometry='geometry')
    distance_series = src_poly_gdf.distance(trg_poly_gdf)
    # print(distance_series)

    # insert distance in merged data column
    merged_data['v1'] = merged_data.source_X - merged_data.target_X
    merged_data['v2'] = merged_data.source_Y - merged_data.target_Y
    # merged_data['euc_distance'] = np.hypot(merged_data.v1, merged_data.v2)
    merged_data['euc_distance'] = distance_series
    # remove when distance "illegal"
    valid_distance = merged_data['euc_distance'] < maximum_distance
    not_same_node = merged_data['euc_distance'] != 0
    data = merged_data[valid_distance & not_same_node]
    # calculate azimuth
    data['azimuth'] = np.degrees(np.arctan2(merged_data['v2'], merged_data['v1']))
    data['bearing'] = (data.azimuth + 360) % 360
    return data


In [26]:
%%time
## create edge list and network
edges = build_edge_list(gdf, 450, gdf_polygon)


CPU times: user 1min 9s, sys: 393 ms, total: 1min 9s
Wall time: 1min 9s


In [23]:
edges

Unnamed: 0,source,target,source_TARGET_FID,source_X,source_Y,source_geometry,source_IgnProb_bl,target_TARGET_FID,target_X,target_Y,target_geometry,target_IgnProb_bl,v1,v2,euc_distance,azimuth,bearing
4,4,0,58153,1.745710e+06,5.428035e+06,POINT (1745710.337 5428035.235),0.000507,58152,1.745709e+06,5.428056e+06,POINT (1745708.763 5428056.083),0.000507,1.574096,-20.847924,3.609837,-85.682146,274.317854
8,8,0,58154,1.745747e+06,5.428051e+06,POINT (1745746.795 5428051.459),0.000507,58152,1.745709e+06,5.428056e+06,POINT (1745708.763 5428056.083),0.000507,38.031920,-4.624399,17.357184,-6.932709,353.067291
12,12,0,58155,1.745749e+06,5.428021e+06,POINT (1745748.650 5428021.351),0.000507,58152,1.745709e+06,5.428056e+06,POINT (1745708.763 5428056.083),0.000507,39.886863,-34.732510,33.943570,-41.048555,318.951445
16,16,0,58156,1.745713e+06,5.428008e+06,POINT (1745712.624 5428008.229),0.000507,58152,1.745709e+06,5.428056e+06,POINT (1745708.763 5428056.083),0.000507,3.861534,-47.853843,32.317248,-85.386551,274.613449
394,394,0,58395,1.745757e+06,5.428029e+06,POINT (1745756.679 5428029.299),0.000507,58152,1.745709e+06,5.428056e+06,POINT (1745708.763 5428056.083),0.000507,47.916030,-26.783944,37.908157,-29.204173,330.795827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3088829,23,1757,5488,1.745626e+06,5.427758e+06,POINT (1745626.276 5427757.701),0.000352,5476,1.745658e+06,5.427726e+06,POINT (1745657.872 5427726.455),0.000352,-31.595477,31.245833,33.857206,135.318786,135.318786
3090156,1350,1757,5080,1.745702e+06,5.427704e+06,POINT (1745702.029 5427704.294),0.000352,5476,1.745658e+06,5.427726e+06,POINT (1745657.872 5427726.455),0.000352,44.157300,-22.160775,40.683044,-26.650236,333.349764
3090178,1372,1757,5092,1.745638e+06,5.427748e+06,POINT (1745638.017 5427748.114),0.000352,5476,1.745658e+06,5.427726e+06,POINT (1745657.872 5427726.455),0.000352,-19.854412,21.658983,17.781075,132.510942,132.510942
3090560,1754,1757,5474,1.745695e+06,5.427695e+06,POINT (1745695.056 5427695.003),0.000352,5476,1.745658e+06,5.427726e+06,POINT (1745657.872 5427726.455),0.000352,37.183961,-31.451685,36.063110,-40.225881,319.774119


In [27]:
len(edges)

1153266