<a href="https://colab.research.google.com/github/WenqiLiao/Underground_Utilities/blob/main/adjacent_angle_shapefile_process_updated_slope.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# -----import package-----

In [None]:
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import random
from shapely.ops import linemerge
from shapely.wkt import loads
from shapely.geometry import LineString
from shapely.geometry import Point, Polygon
from scipy.spatial import Delaunay

# -----point file-----

In [None]:
df_point = gpd.read_file('hydrant_location.shp')

In [None]:
df_point['id'] = df_point.reset_index().index

In [None]:
df_point

Unnamed: 0,id,geometry
0,0,POINT (-74.00698 40.74748)
1,1,POINT (-74.00722 40.74722)
2,2,POINT (-74.00721 40.74697)
3,3,POINT (-74.00722 40.74722)
4,4,POINT (-74.00728 40.74667)
...,...,...
2295,2295,POINT (-73.97976 40.71559)
2296,2296,POINT (-73.97987 40.71361)
2297,2297,POINT (-73.98387 40.72077)
2298,2298,POINT (-73.97780 40.71833)


# -----road file-----

In [None]:
df_road = gpd.read_file('road_locations.shp')

In [None]:
df_road

Unnamed: 0,id,Road_Name,geometry
0,1,Front St,"LINESTRING (-74.01301 40.70215, -74.01167 40.7..."
1,1,Front St,"LINESTRING (-74.01171 40.70247, -74.00999 40.7..."
2,1,Front St,"LINESTRING (-74.00577 40.70566, -74.00396 40.7..."
3,1,Front St,"LINESTRING (-74.00385 40.70651, -74.00142 40.7..."
4,2,Water,"LINESTRING (-74.01270 40.70275, -74.01180 40.7..."
...,...,...,...
331,248,Kenmare St,"LINESTRING (-73.99741 40.72164, -73.99389 40.7..."
332,249,Duane St,"LINESTRING (-74.00203 40.71159, -74.00252 40.7..."
333,250,City Hall,"LINESTRING (-74.00296 40.71334, -74.00150 40.7..."
334,251,Dutch,"LINESTRING (-74.00799 40.70899, -74.00720 40.7..."


# -----calculate distance between road and point-----

In [None]:
#find the 1 closest road for each point
def find_close_road(point, roads_df):
    close_road = None
    min_distance = float('inf')

    for idx, road in roads_df.iterrows():
        distance = point.distance(road['geometry'])
        if distance < min_distance:
            min_distance = distance
            close_road = road['id']
    return close_road

In [None]:
#apply above method to all points in df_point
df_point['close_road'] = df_point.apply(lambda row: find_close_road(row['geometry'], df_road), axis=1)

In [None]:
def find_closest_road(point, roads_df, min_distance):
    closest_road_id = []

    for idx, road in roads_df.iterrows():
        distance = point.distance(road['geometry'])
        if distance < min_distance:
            closest_road_id.append(road['id'])

    return closest_road_id

In [None]:
df_point['closest_road_id'] = df_point.apply(lambda row: find_closest_road(row['geometry'], df_road, 0.0004), axis=1)

In [None]:
df_point

Unnamed: 0,id,geometry,close_road,closest_road_id
0,0,POINT (-74.00698 40.74748),119,"[119, 170]"
1,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]"
2,2,POINT (-74.00721 40.74697),120,"[120, 170]"
3,3,POINT (-74.00722 40.74722),119,"[119, 120, 170]"
4,4,POINT (-74.00728 40.74667),120,"[120, 121, 170]"
...,...,...,...,...
2295,2295,POINT (-73.97976 40.71559),234,"[33, 234]"
2296,2296,POINT (-73.97987 40.71361),30,"[30, 35, 235]"
2297,2297,POINT (-73.98387 40.72077),221,[221]
2298,2298,POINT (-73.97780 40.71833),199,"[199, 235]"


# -----block file-----

In [None]:
def blockInfo(shapefile):
    gdf = gpd.read_file(shapefile)
    block_info_list = []
    for idx, row in gdf.iterrows():
        block_id = row['id']
        polygon = row['geometry']
        corners = list(polygon.exterior.coords)
        block_info = {
            'blockID': block_id,
            'corners': corners
        }
        block_info_list.append(block_info)
    return block_info_list

In [None]:
def blockIdentify(hydrantCenter, blocks):
    hydrantPoint = Point(hydrantCenter)
    for block in blocks:
        blockCoords = block['corners']
        blockPolygon = Polygon(blockCoords)
        if hydrantPoint.within(blockPolygon):
            hydrantBlockID = block['blockID']
            return hydrantBlockID
    return None

In [None]:
blocks = blockInfo("1920s_blocks_complete.shp")
df_point['block_id'] = df_point.apply(lambda row: blockIdentify(row['geometry'], blocks), axis=1)

In [None]:
# Check if the entire dataframe contains NA values
is_na_anywhere = df_point.isna().any().any()

print("Dataframe contains NA values:", is_na_anywhere)

Dataframe contains NA values: True


In [None]:
df_point

Unnamed: 0,id,geometry,close_road,closest_road_id,block_id
0,0,POINT (-74.00698 40.74748),119,"[119, 170]",1.0
1,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0
2,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0
3,3,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0
4,4,POINT (-74.00728 40.74667),120,"[120, 121, 170]",3.0
...,...,...,...,...,...
2295,2295,POINT (-73.97976 40.71559),234,"[33, 234]",505.0
2296,2296,POINT (-73.97987 40.71361),30,"[30, 35, 235]",901.0
2297,2297,POINT (-73.98387 40.72077),221,[221],437.0
2298,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0


# -----Hydrant Class-----

In [None]:
class Hydrant:

    def __init__(self, id, x, y, block_id, closest_road_id, close_road): # constructor

        # From shapefile
        self.pointID = id
        self.x = x
        self.y = y
        self.blockID = block_id
        self.roadID = closest_road_id
        self.closeRoadID = close_road

        # Will initialize once into network generation
        self.branchID = 0
        self.neighbor = [] # List to store all the neighbors of this hydrant
        self.edge = [] # Dict to store edge information

        self.elevation = 0

# -----Edge Class-----

In [None]:
class Edge:

    def __init__(self, id, p1, p2, length): # constructor

        self.edgeID = id
        self.p1 = p1 # id
        self.p2 = p2 # id
        self.length = length
        self.cross_road_id = 0
        self.angle_with_road = 0
        self.adjacent_angle_cost = 0
        self.cost = 0

# -----convert the above dataframe to hydrant object-----

In [None]:
# Create an empty list to store the 'hydrant' instances
hydrant_instances = []

# Iterate through each row in the DataFrame and create 'hydrant' instances
for index, row in df_point.iterrows():
    # Get the Point object from the 'geometry' column
    point = row['geometry']

    # Extract 'x' and 'y' coordinates from the Point object
    x, y = point.x, point.y

    # Create 'hydrant' instance and add it to the list
    hydrant_obj = Hydrant(row['id'], x, y, row['block_id'], row['closest_road_id'], row['close_road'])
    hydrant_instances.append(hydrant_obj)

In [None]:
hydrant_instances[2].blockID

2.0

In [None]:
# Function to calculate the Euclidean distance between two points in meters
def distance(p1, p2):
  p1Lat, p1Long = p1[0], p1[1]
  p2Lat, p2Long = p2[0], p2[1]
  R = 6371000  # Earth's radius in meters
  lat1_rad = math.radians(p1Lat)
  lon1_rad = math.radians(p1Long)
  lat2_rad = math.radians(p2Lat)
  lon2_rad = math.radians(p2Long)

  delta_lat = lat2_rad - lat1_rad
  delta_lon = lon2_rad - lon1_rad

  a = math.sin(delta_lat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2)**2
  c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

  distance = R * c
  return distance

# -----Triangulation-----

In [None]:
# Extract x and y coordinates from hydrant instances
hydrant_points = [(hydrant.x, hydrant.y) for hydrant in hydrant_instances]

# Convert the list of points to a numpy array
points = np.array(hydrant_points)

# Perform Delaunay triangulation
triangulation = Delaunay(points)

# -----Filter out edges with different roadID-----

# -----Filter out edges cross Road that exceed the maximum-----

# -----Filter cross block with different roadID-----

In [None]:
# set the maximum length
max_edge_length = 166.804848

In [None]:
#two functions altered for the closest point
def edge_in_block(hydrant1, hydrant2):
    return hydrant1.blockID == hydrant2.blockID

def blocked_by_block(hydrant1, hydrant2):
    if edge_in_block(hydrant1, hydrant2):
        if hydrant1.closeRoadID == hydrant2.closeRoadID:
            return False
        return True
    else:
        return False

In [None]:
# Set to store filtered edges (edges that don't exceed the maximum length)
filtered_edges_road_cross = set()
edge_set_pro = []

In [None]:
# track edgeID
edge_id = 0

In [None]:
# Loop through each triangle (simplex)
for simplex in triangulation.simplices:

    # Get the indices of the points forming the edges of the triangle
    edge1 = frozenset([simplex[0], simplex[1]])
    edge2 = frozenset([simplex[1], simplex[2]])
    edge3 = frozenset([simplex[0], simplex[2]])

    # Get the actual points from the 'points' list using the indices
    point1, point2, point3 = points[simplex[0]], points[simplex[1]], points[simplex[2]]

    # Get the actual points from the 'hydrant_instances' list using the indices
    hydrant1, hydrant2, hydrant3 = hydrant_instances[simplex[0]], hydrant_instances[simplex[1]], hydrant_instances[simplex[2]]

    # Calculate the edge lengths
    edge1_length = distance(point1, point2)
    edge2_length = distance(point2, point3)
    edge3_length = distance(point1, point3)

    # Check each closest road ID for each point in the triangle
    for road_id in df_point.at[simplex[0], 'closest_road_id']:
        if edge1_length <= max_edge_length \
            and road_id in df_point.at[simplex[1], 'closest_road_id'] \
            and not blocked_by_block(hydrant1, hydrant2):
            filtered_edges_road_cross.add(edge1)
            edge_obj = Edge(edge_id, simplex[0], simplex[1], edge1_length)
            edge_set_pro.append(edge_obj)
            edge_id += 1
            hydrant1.neighbor.append(hydrant2)
            hydrant1.edge.append(edge_obj)
            break  # Break the loop if an edge is found for this road_id
    for road_id in df_point.at[simplex[1], 'closest_road_id']:
        if edge2_length <= max_edge_length \
            and road_id in df_point.at[simplex[2], 'closest_road_id'] \
            and not blocked_by_block(hydrant2, hydrant3):
            filtered_edges_road_cross.add(edge2)
            edge_obj = Edge(edge_id, simplex[1], simplex[2], edge2_length)
            edge_set_pro.append(edge_obj)
            edge_id += 1
            hydrant2.neighbor.append(hydrant3)
            hydrant2.edge.append(edge_obj)
            break
    for road_id in df_point.at[simplex[0], 'closest_road_id']:
        if edge3_length <= max_edge_length \
            and road_id in df_point.at[simplex[2], 'closest_road_id'] \
            and not blocked_by_block(hydrant1, hydrant3):
            filtered_edges_road_cross.add(edge3)
            edge_obj = Edge(edge_id, simplex[0], simplex[2], edge3_length)
            edge_set_pro.append(edge_obj)
            edge_id += 1
            hydrant1.neighbor.append(hydrant3)
            hydrant3.edge.append(edge_obj)
            break

In [None]:
# -----Create a GeoDataFrame for filtered edges-----

# Create a list of LineString objects for filtered edges
lines = []
for edge in filtered_edges_road_cross:
    edge_points = list(edge)
    line = LineString(np.array(points)[edge_points])
    lines.append(line)

# Create a GeoDataFrame from the LineString objects
gdf_filtered_edges_road_cross = gpd.GeoDataFrame(geometry=lines)

# Save the GeoDataFrame as a shapefile
gdf_filtered_edges_road_cross.to_file('filtered_edges_road_block.shp')

In [None]:
gdf_filtered_edges_road_cross

Unnamed: 0,geometry
0,"LINESTRING (-74.00202 40.72984, -74.00106 40.7..."
1,"LINESTRING (-73.99392 40.73044, -73.99417 40.7..."
2,"LINESTRING (-74.00607 40.73459, -74.00568 40.7..."
3,"LINESTRING (-74.00811 40.72302, -74.00806 40.7..."
4,"LINESTRING (-74.00315 40.74150, -74.00203 40.7..."
...,...
4372,"LINESTRING (-73.98869 40.73765, -73.98814 40.7..."
4373,"LINESTRING (-73.99229 40.73736, -73.99301 40.7..."
4374,"LINESTRING (-74.01230 40.71299, -74.01251 40.7..."
4375,"LINESTRING (-74.01233 40.71153, -74.01220 40.7..."


# -----Calculate angle cross road for each edge-----

In [None]:
# def calculate_angle(p1, p2):
#     dx = p2[0] - p1[0]
#     dy = p2[1] - p1[1]
#     return math.degrees(math.atan2(dy, dx))

In [None]:
# def unit_vector(p1, p2):
#     dx = p2[0] - p1[0]
#     dy = p2[1] - p1[1]
#     length = np.sqrt(dx**2 + dy**2)
#     if length > 0:
#         return dx / length, dy / length
#     else:
#         return 0, 0

In [None]:
# def find_edge_crossing_road(edges, roads_df):
#     for edge in edges:
#         road_id = None
#         min_distance = float('inf')
#         intersection_angle = 0  # Initialize the angle at the intersection

#         # Get the actual points from the 'points' list using the indices
#         point1, point2 = points[edge.p1], points[edge.p2]

#         # Calculate the unit direction vector of the edge
#         edge_vector = unit_vector(point1, point2)

#         # Calculate the angle between the edge and each road
#         for _, road_row in roads_df.iterrows():
#             road_geometry = road_row['geometry']

#             # Assuming road_geometry is a LINESTRING object containing coordinates of the road
#             road_coords = road_geometry.coords[:]
#             road_angle = calculate_angle(road_coords[0], road_coords[-1])

#             # Calculate the unit direction vector of the road
#             road_vector = unit_vector(road_coords[0], road_coords[-1])

#             # Calculate the angle between the edge and the road
#             angle_diff = np.arccos(np.clip(np.dot(edge_vector, road_vector), -1.0, 1.0))

#             # Convert the angle from radians to degrees
#             angle_diff_degrees = np.degrees(angle_diff)

#             # Check if this road is closer than previous ones
#             if angle_diff_degrees < min_distance:
#                 min_distance = angle_diff_degrees
#                 road_id = road_row['id']
#                 intersection_angle = road_angle

#         # Now you have the road_id that the edge crosses, the angle between the edge and the road,
#         # and the angle at the intersection of the edge and the road.
#         # You can store this information in the edge object if desired.
#         edge.cross_road_id = road_id
#         edge.angle_with_road = min_distance
#         edge.intersection_angle = intersection_angle


In [None]:
# find_edge_crossing_road(edge_set_pro, df_road)

In [None]:
import math
from sympy import Point, Line, pi


def dot(vA, vB):
    return vA[0]*vB[0]+vA[1]*vB[1]

def ang(lineA, lineB):
    # Get nicer vector form
    vA = [(lineA[0][0]-lineA[1][0]), (lineA[0][1]-lineA[1][1])]
    vB = [(lineB[0][0]-lineB[1][0]), (lineB[0][1]-lineB[1][1])]
    # Get dot prod
    dot_prod = dot(vA, vB)
    # Get magnitudes
    magA = dot(vA, vA)**0.5
    magB = dot(vB, vB)**0.5
    # Get cosine value
    cos_ = dot_prod/magA/magB
    # Get angle in radians and then convert to degrees
    angle = math.acos(dot_prod/magB/magA)
    # Basically doing angle <- angle mod 360
    ang_deg = math.degrees(angle)%360

    if ang_deg-180>=0:
        # As in if statement
        return 360 - ang_deg
    else:
        return ang_deg


def angle_cost_function(edge, all_edges):
  # obtuse_angles = 0
  # acute_angles = 0
  neighboring_edges_adjacency_list = find_neighbors(edge, all_edges)
  cost = 0
  for neighbor in neighboring_edges_adjacency_list[edge]:
    try:
      angle = (ang(neighbor, edge))
      # print("angle:", angle)
      if (angle > 150 and (angle > 75 and angle < 105)):
        cost += (0.2)
        # obtuse_angles += 1
      else:
        cost += (0.8)
        # acute_angles += 1
    except ValueError:
        # print("ValueError")
        pass
  # print("obtuse angles:", obtuse_angles)
  # print("acute angles:", acute_angles)
  return cost



def find_neighbors(edge, all_edges):
  neighboring_edges_adjacency_list = {}
  target_coordinate = edge[0]
  for search_edge1 in all_edges:
    if search_edge1[0] == target_coordinate or search_edge1[1] == target_coordinate:
      if not(edge in neighboring_edges_adjacency_list):
          # print("new target key created")
          neighboring_edges_adjacency_list[edge] = []
      # print("new value added to target")
      neighboring_edges_adjacency_list[edge].append(search_edge1)
      # print("after adding the value:", neighboring_edges_adjacency_list[target])
  target_coordinate = edge[1]
  for search_edge2 in all_edges:
    if search_edge2[0] == target_coordinate or search_edge2[1] == target_coordinate:
      if not(edge in neighboring_edges_adjacency_list):
          # print("new target key created")
          neighboring_edges_adjacency_list[edge] = []
      # print("new value added to target")
      neighboring_edges_adjacency_list[edge].append(search_edge2)
      # print("after adding the value:", neighboring_edges_adjacency_list[target])
  return neighboring_edges_adjacency_list

In [None]:
all_edges = []
for edge in edge_set_pro:
    # edge_points = list(edge)
    # print("X Coordinate:", np.array(points)[edge_points, 0], "Y Coordinate:", np.array(points)[edge_points, 1])
    first_vertex = (hydrant_instances[edge.p1].x, hydrant_instances[edge.p1].y)
    second_vertex = (hydrant_instances[edge.p2].x, hydrant_instances[edge.p2].y)
    edge_endpoint = (tuple(first_vertex), tuple(second_vertex))
    # print("edge_endpoint", edge_endpoint)
    all_edges.append(tuple(edge_endpoint))

for edge in edge_set_pro:
    first_vertex = (hydrant_instances[edge.p1].x, hydrant_instances[edge.p1].y)
    second_vertex = (hydrant_instances[edge.p2].x, hydrant_instances[edge.p2].y)
    edge_endpoint = (tuple(first_vertex), tuple(second_vertex))
    edge.intersection_angle = angle_cost_function(edge_endpoint, all_edges)



In [None]:
# Define a function to create a list of dictionaries containing the relevant attributes of each Edge object
def edge_attributes_to_list(edges):
    edge_list = []
    for edge in edges:
        edge_dict = {
            'edgeID': edge.edgeID,
            'p1': edge.p1,
            'p2': edge.p2,
            'length': edge.length,
            'cross_road_id': edge.cross_road_id,
            'angle_with_road': edge.angle_with_road,
            'intersection_angle': edge.intersection_angle
        }
        edge_list.append(edge_dict)
    return edge_list

# Call the function to convert the Edge objects to a list of dictionaries
edge_list = edge_attributes_to_list(edge_set_pro)

# Create a DataFrame from the list of dictionaries
edge_df = pd.DataFrame(edge_list)

# Display the DataFrame containing edge information
edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle
0,0,293,312,73.819498,0,0,14.4
1,1,312,304,28.143167,0,0,14.4
2,2,293,304,45.696590,0,0,19.2
3,3,307,304,61.087238,0,0,9.6
4,4,312,304,28.143167,0,0,14.4
...,...,...,...,...,...,...,...
8747,8747,445,436,48.371650,0,0,16.0
8748,8748,436,441,60.763325,0,0,9.6
8749,8749,445,441,30.563361,0,0,12.8
8750,8750,445,435,52.070980,0,0,14.4


In [None]:
# # Helper function to normalize angles to the range of 0 to 90 degrees
# def normalize_angle_0_to_90(angle):
#     while angle < 0:
#         angle += 360
#     while angle >= 90:
#         angle -= 90
#     return angle

# # Normalize the intersection angles in the pandas Series
# edge_df['intersection_angle'] = edge_df['intersection_angle'].apply(normalize_angle_0_to_90)

In [None]:
# edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle
0,0,293,312,73.819498,189,0.033167,76.015232
1,1,312,304,28.143167,243,0.105671,75.330291
2,2,293,304,45.696590,221,0.016852,76.519843
3,3,307,304,61.087238,45,0.977769,56.990835
4,4,312,304,28.143167,243,0.105671,75.330291
...,...,...,...,...,...,...,...
8747,8747,445,436,48.371650,180,2.150902,56.513831
8748,8748,436,441,60.763325,248,0.822384,69.839302
8749,8749,445,441,30.563361,3,0.077049,9.865807
8750,8750,445,435,52.070980,110,5.426604,49.462668


In [None]:
# is_zero_in_intersection = (edge_df['intersection_angle'] == 0).any()

In [None]:
# is_zero_in_intersection

False

# -----Elevation-----

In [None]:
elevation_shapefile_path = 'hydrant_locations_with_rasterdata.shp'
first_hydrant_raster_gdf = gpd.read_file(elevation_shapefile_path)

In [None]:
second_elevation_shapefilepath = 'second_hydrants_with_raster.shp'
second_hydrant_raster_gdf = gpd.read_file(second_elevation_shapefilepath)

In [None]:
def create_hydrants_elev_dict(first_hydrant_raster_gdf, second_hydrant_raster_gdf):

  hydrants_to_elev = {}

  for index, row in first_hydrant_raster_gdf.iterrows():
    hydrant_point = row['geometry']
    if pd.isna(row['SAMPLE_1']): #if the value is not in the first shapefile DEM, check second DEM file
        corresponding_elev = second_hydrant_raster_gdf.loc[index, 'ELEV1'] #call the corresponding elevation from second DEM file
        hydrants_to_elev[(hydrant_point.x, hydrant_point.y)] = corresponding_elev #update the dictionary value
    else:
        hydrants_to_elev[(hydrant_point.x, hydrant_point.y)] = row['SAMPLE_1']
  return hydrants_to_elev

In [None]:
elevation_data = create_hydrants_elev_dict(first_hydrant_raster_gdf, second_hydrant_raster_gdf)

In [None]:
# Create a new column 'elevation' in the df_point DataFrame and set it to None initially
df_point['elevation'] = None

# Iterate through the df_point DataFrame and update the 'elevation' column with the corresponding elevation from the dictionary
for index, row in df_point.iterrows():
    geometry = row['geometry']
    elevation = elevation_data.get((geometry.x, geometry.y), None)
    df_point.at[index, 'elevation'] = elevation * 0.3048
    hydrant_instances[index].elevation = elevation

In [None]:
elevation_data

{(-74.00697762006345, 40.74747950102119): 5.269999980926514,
 (-74.0072172653229, 40.747217389018665): 6.840000152587891,
 (-74.00720977640854, 40.74697399930203): 8.09000015258789,
 (-74.00722100978008, 40.74722113347584): 6.760000228881836,
 (-74.00728466555212, 40.746670698270535): 7.440000057220459,
 (-74.00788003424358, 40.74658083129824): 7.800000190734863,
 (-74.00645714051556, 40.747243600218916): 7.519999980926514,
 (-74.00642344040097, 40.746629509241565): 6.920000076293945,
 (-74.00799236795895, 40.74603788500729): 6.309999942779541,
 (-74.0075205663544, 40.74587312889141): 5.210000038146973,
 (-74.00754677755465, 40.74558480568863): -0.759999990463257,
 (-74.00808597938843, 40.745494938716334): 6.980000019073486,
 (-74.0055097928493, 40.74712377758919): 8.300000190734863,
 (-74.00494812427245, 40.74726232250481): 7.480000019073486,
 (-74.00527014758984, 40.74674558741411): 3.240000009536743,
 (-74.00589172748154, 40.74670814284232): 7.090000152587891,
 (-74.0056146376503, 4

In [None]:
df_point

Unnamed: 0,id,geometry,close_road,closest_road_id,block_id,elevation
0,0,POINT (-74.00698 40.74748),119,"[119, 170]",1.0,1.606296
1,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832
2,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
3,3,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.060448
4,4,POINT (-74.00728 40.74667),120,"[120, 121, 170]",3.0,2.267712
...,...,...,...,...,...,...
2295,2295,POINT (-73.97976 40.71559),234,"[33, 234]",505.0,3.62712
2296,2296,POINT (-73.97987 40.71361),30,"[30, 35, 235]",901.0,6.5532
2297,2297,POINT (-73.98387 40.72077),221,[221],437.0,7.303008
2298,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368


# -----cost function-----

In [None]:
# sorry I know this is repetitive but I cannot find any other way at this point
def find_max_min_slope(edges):
    max_slope = float('-inf')
    min_slope = float('inf')

    for edge in edges:
        point_i = points[edge.p1]
        point_j = points[edge.p2]
        hydrant_i = hydrant_instances[edge.p1]
        hydrant_j = hydrant_instances[edge.p2]
        rise = hydrant_j.elevation - hydrant_i.elevation
        run = distance(point_i, point_j)
        slope = rise / run

        # Check for maximum slope
        if slope > max_slope:
            max_slope = slope

        # Check for minimum slope
        if slope < min_slope:
            min_slope = slope

    return max_slope, min_slope

In [None]:
max_length = edge_df['length'].max()
min_length = edge_df['length'].min()
# max_angle = edge_df['intersection_angle'].max()
# min_angle = edge_df['intersection_angle'].min()
max_slope, min_slope = find_max_min_slope(edge_set_pro)

In [None]:
def normalize_min_max(value, min_val, max_val):
    return (value - min_val) / (max_val - min_val)

In [None]:
def costAngle(edge):
    return edge.intersection_angle

def costLength(edge):
    point_i = points[edge.p1]
    point_j = points[edge.p2]
    hydrant_i = hydrant_instances[edge.p1]
    hydrant_j = hydrant_instances[edge.p2]
    return normalize_min_max(distance(point_i, point_j), 0, max_edge_length)

def costSlope(edge):
    hydrant_j = df_point.iloc[edge.p2]
    hydrant_i = df_point.iloc[edge.p1]
    rise = hydrant_j.elevation - hydrant_i.elevation
    run = edge.length
    slope = rise/run
    if slope < 0:
        return 1
    elif slope > 0.006146721416: #too steep
      cost = normalize_min_max(50*slope, 0, max_slope)
      if cost >1:
        cost = 1
      return cost
    elif slope > 0.001702866396: # in max range
      cost = normalize_min_max(25*slope, 0, max_slope)
      if cost >1:
        cost = 1
      return cost
    elif slope > 0.000661225445: #just right
      cost = normalize_min_max(slope, 0, max_slope)
      if cost >1:
        cost = 1
      return cost
    else: #too flat
      cost = normalize_min_max(100*slope, 0, max_slope)
      if cost >1:
        cost = 1
      return cost


In [None]:
edge_df['slope'] = None
for idx, edge in edge_df.iterrows():
  hydrant_j = df_point.iloc[edge.p2]
  hydrant_i = df_point.iloc[edge.p1]
  rise = hydrant_j.elevation - hydrant_i.elevation
  run = edge.length
  edge_df['slope'].iloc[idx] = rise/run
edge_df['slope_cost'] = None
edge_df['slope_cost'] = edge_df.apply(lambda row: costSlope(row), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  edge_df['slope'].iloc[idx] = rise/run


In [None]:
edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost
0,0,293,312,73.819498,0,0,14.4,0.002312,0.040306
1,1,312,304,28.143167,0,0,14.4,0.002166,0.037759
2,2,293,304,45.696590,0,0,19.2,0.005069,0.088367
3,3,307,304,61.087238,0,0,9.6,0.008432,0.293985
4,4,312,304,28.143167,0,0,14.4,0.002166,0.037759
...,...,...,...,...,...,...,...,...,...
8747,8747,445,436,48.371650,0,0,16.0,0.035539,1.000000
8748,8748,436,441,60.763325,0,0,9.6,-0.022924,1.000000
8749,8749,445,441,30.563361,0,0,12.8,0.010671,0.372024
8750,8750,445,435,52.070980,0,0,14.4,0.007551,0.263258


# -----Genetic Algorithm-----

In [None]:
# Define the cost function using the given formula
def cost_function(αL, αS, α𝛳, edge):
    return αL * costLength(edge) + αS * costSlope(edge) + α𝛳 * costAngle(edge)

In [None]:
# Define the fitness function
def fitness(αL, αS, α𝛳, edges):
    total_cost = sum(cost_function(αL, αS, α𝛳, edge) for edge in edges)
    return total_cost

In [None]:
# Genetic Algorithm function
import random
import numpy as np
from concurrent.futures import ThreadPoolExecutor

def genetic_algorithm(edges, population_size=50, num_generations=100, mutation_rate=0.1):
    num_edges = len(edges)

    # Generate an initial population of random individuals (chromosomes)
    population = np.random.rand(population_size, 3)  # [αL, αS, α𝛳]

    with ThreadPoolExecutor() as executor:
        for generation in range(num_generations):
            # Evaluate the fitness of each individual in the population using parallelism
            fitness_scores = list(executor.map(lambda ind: fitness(ind[0], ind[1], ind[2], edges), population))

            # Perform selection using indices of sorted fitness scores
            selected_indices = np.argsort(fitness_scores)[:population_size // 2]
            selected_population = population[selected_indices]

            # Perform crossover and mutation using NumPy vectorization
            parents = selected_population[np.random.choice(len(selected_population), size=(population_size, 2))]
            children = (parents[:, 0] + parents[:, 1]) / 2

            # Apply mutation using NumPy vectorization
            mask = np.random.rand(*children.shape) < mutation_rate
            mutation = np.random.uniform(-0.1, 0.1, size=children.shape)
            children += mask * mutation

            # Replace old population with new population
            population = children

    # Find the best individual
    best_index = np.argmin(fitness_scores)
    best_individual = {'αL': population[best_index, 0], 'αS': population[best_index, 1], 'α𝛳': population[best_index, 2]}

    return best_individual

In [None]:
best_weights = genetic_algorithm(edge_set_pro)

In [None]:
# best_weights =

NameError: ignored

In [None]:
# Assuming you have already found the best weights
# best_αL = best_weights['αL']
# best_αS = best_weights['αS']
# best_α𝛳 = best_weights['α𝛳']

best_αL = 0.5135085953821169
best_αS = 0.411738873917856
best_α𝛳 = -1.1121284906606115

# Calculate the cost for each edge using the best weights
edge_costs = [cost_function(best_αL, best_αS, best_α𝛳, edge) for edge in edge_set_pro]

# Add the calculated costs to the edge list as a new attribute
for edge, cost in zip(edge_set_pro, edge_costs):
    edge.cost = cost

In [None]:
edge_df['cost'] = edge_costs

In [None]:
edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost
0,0,293,312,73.819498,0,0,14.4,0.002312,0.040306,-15.770801
1,1,312,304,28.143167,0,0,14.4,0.002166,0.037759,-15.912465
2,2,293,304,45.696590,0,0,19.2,0.005069,0.088367,-21.175806
3,3,307,304,61.087238,0,0,9.6,0.008432,0.293985,-10.367332
4,4,312,304,28.143167,0,0,14.4,0.002166,0.037759,-15.912465
...,...,...,...,...,...,...,...,...,...,...
8747,8747,445,436,48.371650,0,0,16.0,0.035539,1.000000,-17.233405
8748,8748,436,441,60.763325,0,0,9.6,-0.022924,1.000000,-10.077635
8749,8749,445,441,30.563361,0,0,12.8,0.010671,0.372024,-13.987979
8750,8750,445,435,52.070980,0,0,14.4,0.007551,0.263258,-15.745956


In [None]:
sorted_grouped_edge_df = edge_df.groupby('p1', group_keys=False).apply(lambda x: x.sort_values(by='cost'))

In [None]:
sorted_grouped_edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost
1224,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720
1226,1226,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720
1228,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960
157,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199
445,445,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199
...,...,...,...,...,...,...,...,...,...,...
2430,2430,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667
473,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413
1279,1279,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413
474,474,2298,1994,24.908170,0,0,8.0,0.010646,0.371164,-8.667526


In [None]:
sorted_grouped_edge_df = sorted_grouped_edge_df.drop_duplicates()

In [None]:
sorted_grouped_edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost
1224,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720
1226,1226,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720
1228,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960
157,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199
445,445,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199
...,...,...,...,...,...,...,...,...,...,...
2430,2430,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667
473,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413
1279,1279,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413
474,474,2298,1994,24.908170,0,0,8.0,0.010646,0.371164,-8.667526


In [None]:
sorted_grouped_edge_df = sorted_grouped_edge_df.drop_duplicates(subset=['p1', 'p2', 'length', 'cross_road_id', 'angle_with_road', 'intersection_angle'])

In [None]:
sorted_grouped_edge_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost
1224,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720
1228,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960
157,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199
156,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534
159,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094
...,...,...,...,...,...,...,...,...,...,...
1596,1596,2297,2234,1.567053,0,0,9.6,-0.009725,1.000000,-10.259870
479,479,2298,2012,41.016590,0,0,12.8,-0.001709,1.000000,-13.697236
480,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667
473,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413


In [None]:
# count_above_zero = (sorted_grouped_edge_df['cost'] > 0).sum()

In [None]:
# count_above_zero

1

In [None]:
# no_above_zero_df = sorted_grouped_edge_df.loc[sorted_grouped_edge_df['cost'] <= 0]

In [None]:
# no_above_zero_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost
1224,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720
1228,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960
157,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199
156,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534
159,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094
...,...,...,...,...,...,...,...,...,...,...
1596,1596,2297,2234,1.567053,0,0,9.6,-0.009725,1.000000,-10.259870
479,479,2298,2012,41.016590,0,0,12.8,-0.001709,1.000000,-13.697236
480,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667
473,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413


In [None]:
# Merge the points DataFrame for p1
# filtered_edges_no_above_zero_df = pd.merge(no_above_zero_df, df_point, left_on='p1', right_on='id', how='left')

In [None]:
# filtered_edges_no_above_zero_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,id,geometry,close_road,closest_road_id,block_id,elevation
0,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832
1,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832
2,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
3,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
4,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5830,1596,2297,2234,1.567053,0,0,9.6,-0.009725,1.000000,-10.259870,2297,POINT (-73.98387 40.72077),221,[221],437.0,7.303008
5831,479,2298,2012,41.016590,0,0,12.8,-0.001709,1.000000,-13.697236,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368
5832,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368
5833,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368


In [None]:
# # Duplicate the column with a different name
# filtered_edges_no_above_zero_df['geometry_p1'] = filtered_edges_no_above_zero_df['geometry']
# filtered_edges_no_above_zero_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,id,geometry,close_road,closest_road_id,block_id,elevation,geometry_p1
0,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722)
1,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722)
2,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697)
3,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697)
4,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5830,1596,2297,2234,1.567053,0,0,9.6,-0.009725,1.000000,-10.259870,2297,POINT (-73.98387 40.72077),221,[221],437.0,7.303008,POINT (-73.98387 40.72077)
5831,479,2298,2012,41.016590,0,0,12.8,-0.001709,1.000000,-13.697236,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833)
5832,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833)
5833,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833)


In [None]:
# Merge the points DataFrame for p2
# filtered_edges_no_above_zero_df = pd.merge(filtered_edges_no_above_zero_df, df_point, left_on='p2', right_on='id', how='left')

In [None]:
# filtered_edges_no_above_zero_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,...,closest_road_id_x,block_id_x,elevation_x,geometry_p1,id_y,geometry_y,close_road_y,closest_road_id_y,block_id_y,elevation_y
0,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,...,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722),0,POINT (-74.00698 40.74748),119,"[119, 170]",1.0,1.606296
1,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,...,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722),3,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.060448
2,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,...,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697),5,POINT (-74.00788 40.74658),170,"[121, 170]",,2.37744
3,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,...,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697),0,POINT (-74.00698 40.74748),119,"[119, 170]",1.0,1.606296
4,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,...,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697),4,POINT (-74.00728 40.74667),120,"[120, 121, 170]",3.0,2.267712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5830,1596,2297,2234,1.567053,0,0,9.6,-0.009725,1.000000,-10.259870,...,[221],437.0,7.303008,POINT (-73.98387 40.72077),2234,POINT (-73.98388 40.72078),221,[221],437.0,7.287768
5831,479,2298,2012,41.016590,0,0,12.8,-0.001709,1.000000,-13.697236,...,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833),2012,POINT (-73.97743 40.71823),199,"[199, 235]",445.0,2.703576
5832,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,...,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833),2011,POINT (-73.97715 40.71870),235,[235],445.0,2.983992
5833,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,...,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833),2014,POINT (-73.97827 40.71847),199,"[199, 234]",444.0,3.099816


In [None]:
# # List of column names to keep
# columns_to_keep = ['edgeID', 'id_x', 'id_y', 'geometry_x', 'geometry_y', 'cost']

# # Keep only the specified columns
# filtered_edges_no_above_zero_df = filtered_edges_no_above_zero_df.loc[:, columns_to_keep]

In [None]:
# filtered_edges_no_above_zero_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost
0,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720
1,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960
2,157,2,5,POINT (-74.00721 40.74697),POINT (-74.00788 40.74658),-16.260199
3,156,2,0,POINT (-74.00721 40.74697),POINT (-74.00698 40.74748),-14.620534
4,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094
...,...,...,...,...,...,...
5830,1596,2297,2234,POINT (-73.98387 40.72077),POINT (-73.98388 40.72078),-10.259870
5831,479,2298,2012,POINT (-73.97780 40.71833),POINT (-73.97743 40.71823),-13.697236
5832,480,2298,2011,POINT (-73.97780 40.71833),POINT (-73.97715 40.71870),-12.208667
5833,473,2298,2014,POINT (-73.97780 40.71833),POINT (-73.97827 40.71847),-10.426413


In [None]:
# Create LineString geometries for both p1 and p2
# filtered_edges_no_above_zero_df['geometry'] = filtered_edges_no_above_zero_df.apply(lambda row: LineString([row['geometry_x'], row['geometry_y']]), axis=1)

In [None]:
# filtered_edges_no_above_zero_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost,geometry
0,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720,LINESTRING (-74.0072172653229 40.7472173890186...
1,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960,LINESTRING (-74.0072172653229 40.7472173890186...
2,157,2,5,POINT (-74.00721 40.74697),POINT (-74.00788 40.74658),-16.260199,LINESTRING (-74.00720977640854 40.746973999302...
3,156,2,0,POINT (-74.00721 40.74697),POINT (-74.00698 40.74748),-14.620534,LINESTRING (-74.00720977640854 40.746973999302...
4,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094,LINESTRING (-74.00720977640854 40.746973999302...
...,...,...,...,...,...,...,...
5830,1596,2297,2234,POINT (-73.98387 40.72077),POINT (-73.98388 40.72078),-10.259870,LINESTRING (-73.98386708301707 40.720766081186...
5831,479,2298,2012,POINT (-73.97780 40.71833),POINT (-73.97743 40.71823),-13.697236,LINESTRING (-73.97780125392501 40.718332506285...
5832,480,2298,2011,POINT (-73.97780 40.71833),POINT (-73.97715 40.71870),-12.208667,LINESTRING (-73.97780125392501 40.718332506285...
5833,473,2298,2014,POINT (-73.97780 40.71833),POINT (-73.97827 40.71847),-10.426413,LINESTRING (-73.97780125392501 40.718332506285...


In [None]:
# gdf = filtered_edges_no_above_zero_df.loc[:, ['geometry']]

In [None]:
# gdf

Unnamed: 0,geometry
0,LINESTRING (-74.0072172653229 40.7472173890186...
1,LINESTRING (-74.0072172653229 40.7472173890186...
2,LINESTRING (-74.00720977640854 40.746973999302...
3,LINESTRING (-74.00720977640854 40.746973999302...
4,LINESTRING (-74.00720977640854 40.746973999302...
...,...
5830,LINESTRING (-73.98386708301707 40.720766081186...
5831,LINESTRING (-73.97780125392501 40.718332506285...
5832,LINESTRING (-73.97780125392501 40.718332506285...
5833,LINESTRING (-73.97780125392501 40.718332506285...


In [None]:
# geometry = gdf['geometry'].apply(LineString)

In [None]:
# gdf = gpd.GeoDataFrame(gdf, geometry=geometry)

In [None]:
# # Define the output shapefile path
# output_shapefile = 'GA_no_above_zero_lines.shp'

# # Save the GeoDataFrame to a shapefile
# gdf.to_file(output_shapefile)

In [None]:
#remove duplicates
def check_symmetry(row):
    match_row = sorted_grouped_edge_df[(sorted_grouped_edge_df['p1'] == row['p2']) & (sorted_grouped_edge_df['p2'] == row['p1'])]
    return not match_row.empty

sorted_grouped_edge_df['symmetric'] = sorted_grouped_edge_df.apply(check_symmetry, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sorted_grouped_edge_df['symmetric'] = sorted_grouped_edge_df.apply(check_symmetry, axis=1)


In [None]:
# Create a mask to identify rows to be deleted
delete_mask = sorted_grouped_edge_df['symmetric'] & (sorted_grouped_edge_df['p1'] > sorted_grouped_edge_df['p2'])

# Mark the rows to be deleted
sorted_grouped_edge_df.loc[delete_mask, 'delete'] = True

# Keep one row from each symmetric pair
no_duplicate_df = sorted_grouped_edge_df[~(sorted_grouped_edge_df['delete'] & sorted_grouped_edge_df['symmetric'])]

# Clean up the DataFrame
no_duplicate_df = no_duplicate_df.drop(columns=['delete'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sorted_grouped_edge_df.loc[delete_mask, 'delete'] = True


In [None]:
no_duplicate_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,symmetric
1224,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,False
1228,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,True
157,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,False
156,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,False
159,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,False
...,...,...,...,...,...,...,...,...,...,...,...
554,554,2296,1942,49.194948,0,0,11.2,-0.008364,1.000000,-11.892654,False
1561,1561,2297,2212,64.159050,0,0,12.8,0.007791,0.271628,-13.925891,False
480,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,False
473,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,False


In [None]:
# Merge the points DataFrame for p1
no_duplicate_df = pd.merge(no_duplicate_df, df_point, left_on='p1', right_on='id', how='left')

In [None]:
no_duplicate_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,symmetric,id,geometry,close_road,closest_road_id,block_id,elevation
0,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,False,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832
1,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,True,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832
2,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,False,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
3,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,False,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
4,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,False,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4372,554,2296,1942,49.194948,0,0,11.2,-0.008364,1.000000,-11.892654,False,2296,POINT (-73.97987 40.71361),30,"[30, 35, 235]",901.0,6.5532
4373,1561,2297,2212,64.159050,0,0,12.8,0.007791,0.271628,-13.925891,False,2297,POINT (-73.98387 40.72077),221,[221],437.0,7.303008
4374,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,False,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368
4375,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,False,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368


In [None]:
# Duplicate the column with a different name
no_duplicate_df['geometry_p1'] = no_duplicate_df['geometry']
no_duplicate_df


Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,symmetric,id,geometry,close_road,closest_road_id,block_id,elevation,geometry_p1
0,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,False,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722)
1,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,True,1,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722)
2,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,False,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697)
3,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,False,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697)
4,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,False,2,POINT (-74.00721 40.74697),120,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4372,554,2296,1942,49.194948,0,0,11.2,-0.008364,1.000000,-11.892654,False,2296,POINT (-73.97987 40.71361),30,"[30, 35, 235]",901.0,6.5532,POINT (-73.97987 40.71361)
4373,1561,2297,2212,64.159050,0,0,12.8,0.007791,0.271628,-13.925891,False,2297,POINT (-73.98387 40.72077),221,[221],437.0,7.303008,POINT (-73.98387 40.72077)
4374,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,False,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833)
4375,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,False,2298,POINT (-73.97780 40.71833),199,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833)


In [None]:
# Merge the points DataFrame for p2
no_duplicate_df = pd.merge(no_duplicate_df, df_point, left_on='p2', right_on='id', how='left')

In [None]:
no_duplicate_df

Unnamed: 0,edgeID,p1,p2,length,cross_road_id,angle_with_road,intersection_angle,slope,slope_cost,cost,...,closest_road_id_x,block_id_x,elevation_x,geometry_p1,id_y,geometry_y,close_road_y,closest_road_id_y,block_id_y,elevation_y
0,1224,1,0,27.830981,0,0,10.4,-0.017194,1.000000,-11.068720,...,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722),0,POINT (-74.00698 40.74748),119,"[119, 170]",1.0,1.606296
1,1228,1,3,0.431879,0,0,8.0,-0.05646,1.000000,-8.483960,...,"[119, 120, 170]",2.0,2.084832,POINT (-74.00722 40.74722),3,POINT (-74.00722 40.74722),119,"[119, 120, 170]",2.0,2.060448
2,157,2,5,75.496297,0,0,15.2,-0.001171,1.000000,-16.260199,...,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697),5,POINT (-74.00788 40.74658),170,"[121, 170]",,2.37744
3,156,2,0,30.103671,0,0,13.6,-0.028553,1.000000,-14.620534,...,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697),0,POINT (-74.00698 40.74748),119,"[119, 170]",1.0,1.606296
4,159,2,4,12.477320,0,0,12.8,-0.015878,1.000000,-13.785094,...,"[120, 170]",2.0,2.465832,POINT (-74.00721 40.74697),4,POINT (-74.00728 40.74667),120,"[120, 121, 170]",3.0,2.267712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4372,554,2296,1942,49.194948,0,0,11.2,-0.008364,1.000000,-11.892654,...,"[30, 35, 235]",901.0,6.5532,POINT (-73.97987 40.71361),1942,POINT (-73.97947 40.71296),30,[30],880.0,6.14172
4373,1561,2297,2212,64.159050,0,0,12.8,0.007791,0.271628,-13.925891,...,[221],437.0,7.303008,POINT (-73.98387 40.72077),2212,POINT (-73.98444 40.72049),221,"[199, 221]",436.0,7.80288
4374,480,2298,2011,73.630471,0,0,11.2,0.002856,0.049791,-12.208667,...,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833),2011,POINT (-73.97715 40.71870),235,[235],445.0,2.983992
4375,473,2298,2014,51.931581,0,0,9.6,0.00628,0.218948,-10.426413,...,"[199, 235]",444.0,2.77368,POINT (-73.97780 40.71833),2014,POINT (-73.97827 40.71847),199,"[199, 234]",444.0,3.099816


In [None]:
# List of column names to keep
columns_to_keep = ['edgeID', 'id_x', 'id_y', 'geometry_x', 'geometry_y', 'cost']

# Keep only the specified columns
no_duplicate_df = no_duplicate_df.loc[:, columns_to_keep]

In [None]:
no_duplicate_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost
0,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720
1,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960
2,157,2,5,POINT (-74.00721 40.74697),POINT (-74.00788 40.74658),-16.260199
3,156,2,0,POINT (-74.00721 40.74697),POINT (-74.00698 40.74748),-14.620534
4,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094
...,...,...,...,...,...,...
4372,554,2296,1942,POINT (-73.97987 40.71361),POINT (-73.97947 40.71296),-11.892654
4373,1561,2297,2212,POINT (-73.98387 40.72077),POINT (-73.98444 40.72049),-13.925891
4374,480,2298,2011,POINT (-73.97780 40.71833),POINT (-73.97715 40.71870),-12.208667
4375,473,2298,2014,POINT (-73.97780 40.71833),POINT (-73.97827 40.71847),-10.426413


In [None]:
# Create LineString geometries for both p1 and p2
no_duplicate_df['geometry'] = no_duplicate_df.apply(lambda row: LineString([row['geometry_x'], row['geometry_y']]), axis=1)

In [None]:
no_duplicate_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost,geometry
0,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720,LINESTRING (-74.0072172653229 40.7472173890186...
1,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960,LINESTRING (-74.0072172653229 40.7472173890186...
2,157,2,5,POINT (-74.00721 40.74697),POINT (-74.00788 40.74658),-16.260199,LINESTRING (-74.00720977640854 40.746973999302...
3,156,2,0,POINT (-74.00721 40.74697),POINT (-74.00698 40.74748),-14.620534,LINESTRING (-74.00720977640854 40.746973999302...
4,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094,LINESTRING (-74.00720977640854 40.746973999302...
...,...,...,...,...,...,...,...
4372,554,2296,1942,POINT (-73.97987 40.71361),POINT (-73.97947 40.71296),-11.892654,LINESTRING (-73.97987404430187 40.713608009757...
4373,1561,2297,2212,POINT (-73.98387 40.72077),POINT (-73.98444 40.72049),-13.925891,LINESTRING (-73.98386708301707 40.720766081186...
4374,480,2298,2011,POINT (-73.97780 40.71833),POINT (-73.97715 40.71870),-12.208667,LINESTRING (-73.97780125392501 40.718332506285...
4375,473,2298,2014,POINT (-73.97780 40.71833),POINT (-73.97827 40.71847),-10.426413,LINESTRING (-73.97780125392501 40.718332506285...


In [None]:
graph = gpd.GeoDataFrame(columns=['edgeID', 'geometry'])

In [None]:
def dfs(node, max_edges):
    global graph
    edges = no_duplicate_df[no_duplicate_df['id_x'] == node].sort_values('cost')
    connected_edges = 0

    for _, edge in edges.iterrows():
        if graph[graph['edgeID'] == edge['edgeID']].shape[0] == 0:
            if graph[graph['geometry'].apply(lambda x: edge['geometry_x'].equals(x) or edge['geometry_y'].equals(x))].shape[0] < max_edges:
                id_y_connected = graph[graph['geometry'].apply(lambda x: edge['geometry_y'].equals(x))].shape[0]
                if id_y_connected < 3:
                    graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
                    connected_edges += 1
                    dfs(edge['id_y'], max_edges)
            if connected_edges >= max_edges:
                break


for start_point in no_duplicate_df['id_x'].unique():
    dfs(start_point, max_edges=3)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)
  graph = graph.append(edge[['edgeID', 'geometry']], ignore_index=True)

In [None]:
graph.to_file('mapping.shp')

In [None]:
no_duplicate_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost,geometry
0,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720,LINESTRING (-74.0072172653229 40.7472173890186...
1,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960,LINESTRING (-74.0072172653229 40.7472173890186...
2,157,2,5,POINT (-74.00721 40.74697),POINT (-74.00788 40.74658),-16.260199,LINESTRING (-74.00720977640854 40.746973999302...
3,156,2,0,POINT (-74.00721 40.74697),POINT (-74.00698 40.74748),-14.620534,LINESTRING (-74.00720977640854 40.746973999302...
4,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094,LINESTRING (-74.00720977640854 40.746973999302...
...,...,...,...,...,...,...,...
4372,554,2296,1942,POINT (-73.97987 40.71361),POINT (-73.97947 40.71296),-11.892654,LINESTRING (-73.97987404430187 40.713608009757...
4373,1561,2297,2212,POINT (-73.98387 40.72077),POINT (-73.98444 40.72049),-13.925891,LINESTRING (-73.98386708301707 40.720766081186...
4374,480,2298,2011,POINT (-73.97780 40.71833),POINT (-73.97715 40.71870),-12.208667,LINESTRING (-73.97780125392501 40.718332506285...
4375,473,2298,2014,POINT (-73.97780 40.71833),POINT (-73.97827 40.71847),-10.426413,LINESTRING (-73.97780125392501 40.718332506285...


In [None]:
sorted_df = no_duplicate_df.groupby('id_x').apply(lambda x: x.nlargest(2, 'cost')).reset_index(drop=True)

In [None]:
sorted_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost,geometry
0,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960,LINESTRING (-74.0072172653229 40.7472173890186...
1,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720,LINESTRING (-74.0072172653229 40.7472173890186...
2,7781,2,7,POINT (-74.00721 40.74697),POINT (-74.00642 40.74663),-9.993567,LINESTRING (-74.00720977640854 40.746973999302...
3,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094,LINESTRING (-74.00720977640854 40.746973999302...
4,442,3,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-9.288186,LINESTRING (-74.00722100978008 40.747221133475...
...,...,...,...,...,...,...,...
3145,554,2296,1942,POINT (-73.97987 40.71361),POINT (-73.97947 40.71296),-11.892654,LINESTRING (-73.97987404430187 40.713608009757...
3146,560,2296,2293,POINT (-73.97987 40.71361),POINT (-73.97965 40.71397),-17.299268,LINESTRING (-73.97987404430187 40.713608009757...
3147,1561,2297,2212,POINT (-73.98387 40.72077),POINT (-73.98444 40.72049),-13.925891,LINESTRING (-73.98386708301707 40.720766081186...
3148,474,2298,1994,POINT (-73.97780 40.71833),POINT (-73.97780 40.71752),-8.667526,LINESTRING (-73.97780125392501 40.718332506285...


In [None]:
# Count the occurrences of each unique 'p1' value
p1_counts = no_duplicate_df['id_x'].value_counts()

# Filter 'p1' values with more than 2 occurrences
filtered_p1 = p1_counts[p1_counts > 2].index

# Create a mask to identify rows to be removed
mask = no_duplicate_df['id_x'].isin(filtered_p1) & (no_duplicate_df.groupby('id_x')['cost'].rank(ascending=False) > 2)


In [None]:
mask

0       False
1       False
2        True
3        True
4       False
        ...  
4372    False
4373    False
4374     True
4375    False
4376    False
Length: 4377, dtype: bool

In [None]:
# Apply the mask and get the filtered DataFrame
no_duplicate_only_two_df = no_duplicate_df[~mask]

In [None]:
no_duplicate_only_two_df

Unnamed: 0,edgeID,id_x,id_y,geometry_x,geometry_y,cost,geometry
0,1224,1,0,POINT (-74.00722 40.74722),POINT (-74.00698 40.74748),-11.068720,LINESTRING (-74.0072172653229 40.7472173890186...
1,1228,1,3,POINT (-74.00722 40.74722),POINT (-74.00722 40.74722),-8.483960,LINESTRING (-74.0072172653229 40.7472173890186...
4,159,2,4,POINT (-74.00721 40.74697),POINT (-74.00728 40.74667),-13.785094,LINESTRING (-74.00720977640854 40.746973999302...
5,7781,2,7,POINT (-74.00721 40.74697),POINT (-74.00642 40.74663),-9.993567,LINESTRING (-74.00720977640854 40.746973999302...
6,444,3,5,POINT (-74.00722 40.74722),POINT (-74.00788 40.74658),-14.861420,LINESTRING (-74.00722100978008 40.747221133475...
...,...,...,...,...,...,...,...
4371,560,2296,2293,POINT (-73.97987 40.71361),POINT (-73.97965 40.71397),-17.299268,LINESTRING (-73.97987404430187 40.713608009757...
4372,554,2296,1942,POINT (-73.97987 40.71361),POINT (-73.97947 40.71296),-11.892654,LINESTRING (-73.97987404430187 40.713608009757...
4373,1561,2297,2212,POINT (-73.98387 40.72077),POINT (-73.98444 40.72049),-13.925891,LINESTRING (-73.98386708301707 40.720766081186...
4375,473,2298,2014,POINT (-73.97780 40.71833),POINT (-73.97827 40.71847),-10.426413,LINESTRING (-73.97780125392501 40.718332506285...


In [None]:
no_duplicate_only_two_df = no_duplicate_only_two_df.loc[:, ['geometry', 'cost']]

In [None]:
no_duplicate_only_two_df

Unnamed: 0,geometry,cost
0,LINESTRING (-74.0072172653229 40.7472173890186...,-11.068720
1,LINESTRING (-74.0072172653229 40.7472173890186...,-8.483960
4,LINESTRING (-74.00720977640854 40.746973999302...,-13.785094
5,LINESTRING (-74.00720977640854 40.746973999302...,-9.993567
6,LINESTRING (-74.00722100978008 40.747221133475...,-14.861420
...,...,...
4371,LINESTRING (-73.97987404430187 40.713608009757...,-17.299268
4372,LINESTRING (-73.97987404430187 40.713608009757...,-11.892654
4373,LINESTRING (-73.98386708301707 40.720766081186...,-13.925891
4375,LINESTRING (-73.97780125392501 40.718332506285...,-10.426413


In [None]:
geometry = no_duplicate_only_two_df['geometry'].apply(LineString)

In [None]:
no_duplicate_only_two_df = gpd.GeoDataFrame(no_duplicate_only_two_df, geometry=geometry)


In [None]:
# Define the output shapefile path
output_shapefile = 'GA_just_two_reverse.shp'

# Save the GeoDataFrame to a shapefile
no_duplicate_only_two_df.to_file(output_shapefile)