In [72]:
## Run when initialise the code
import os
import geopandas as gpd
import osmnx as ox
import pickle
project_crs = 'epsg:3857'
import warnings
warnings.filterwarnings(action='ignore')
pjr_loc = os.path.dirname(os.getcwd())
from shapely.geometry import  Point, LineString, MultiPolygon, MultiPoint
from momepy import remove_false_nodes
from geopandas import GeoDataFrame, GeoSeries
from tqdm import tqdm
from sklearn.cluster import DBSCAN
import time

In [73]:


## Choose locations and create folders if necessary

place = 'Turin'
print(place)
data_folder = f'{pjr_loc}/places/{place.replace(",", "_").replace(" ", "_")}_test'
os.makedirs(f'{data_folder}/delete_2_nodes', exist_ok=True)
os.makedirs(f'{data_folder}/split_tp_intersection', exist_ok=True)
if place == 'Tel Aviv':
    useful_tags_path = ['name:en', 'highway', 'length', 'bearing', 'tunnel', 'junction']
    ox.utils.config(useful_tags_way=useful_tags_path)


Turin


In [74]:
## Run when initialise the code
# find and store roundabout
my_gdf = gpd.read_file(f'{data_folder}/osm_data.gpkg',layer = 'edges')# Identify roundabout elements, if any exist, and store them in a separate DataFrame.
if place =='Tel Aviv':
    my_gdf.rename(columns={'name:en':'name'}, inplace=True)
is_junction= True if 'junction' in my_gdf.columns else False
if is_junction:
    round_about = my_gdf[my_gdf['junction'].isin(['roundabout', 'circular'])]
    my_gdf= my_gdf[~((my_gdf['junction'] == 'roundabout') | (my_gdf['junction'] == 'circular'))]

In [78]:

# region
# Classes to be employed during the execution of this code.
#Intersection
#Split in intersection
class Intersection:
    def __init__(self,network:GeoDataFrame,number:int):
        """

        :param network:
        :param number: give a unique name to the files created during the process (this class will be use again in this code)
        """
        self.my_network = network
        self.inter_pnt_dic = {'geometry':[],'name':[]}
        self.lines_to_delete =[]
        self.num = number

    def intersection_network(self):
        """
        This function fix topology (add or remove vertices) where needed
        :return:
        """
        # First remove_false_nodes
        self.my_network = remove_false_nodes(self.my_network).reset_index(drop=True)
        self.my_network.to_file(f'{data_folder}/delete_2_nodes/delete_false_intersection.shp')
        # Create buffer around each element
        buffer_around_lines= self.my_network['geometry'].buffer(cap_style=3, distance=1, join_style=3)


        # s_join between buffer to lines
        s_join_0 =gpd.sjoin(left_df=GeoDataFrame(geometry=buffer_around_lines,crs=project_crs),right_df=self.my_network)

        # delete lines belong to the buffer
        s_join = s_join_0[s_join_0.index!=s_join_0['index_right']]


        # Find new intersections that are not at the beginning or end of the line
        for_time =len(s_join)
        with tqdm(total=for_time) as pbar:
            s_join.apply(lambda x: self.find_intersection_points(x,pbar), axis=1)
        if len(self.inter_pnt_dic)==0:
            return
        inter_pnt_gdf = GeoDataFrame(self.inter_pnt_dic,crs=project_crs)
        inter_pnt_gdf.to_file(f'{data_folder}/delete_2_nodes/inter_pnt_dic.shp')
        # Split string line by points
        segments = {'geometry':[],'org_id':[]}
        # Groupby points name (which is the line they should split)
        my_groups =  inter_pnt_gdf.groupby('name')
        for_time = len(my_groups)
        with  tqdm(total=for_time) as pbar:
            for group_pnts in my_groups:
                pbar.update(1)
                points  = group_pnts[1]
                points['is_split'] = True
                # if group_pnts[0]==588:
                #     print(points)
                # get the line to split by comparing the name
                row = self.my_network.loc[group_pnts[0]]
                current = list(row.geometry.coords)
                points_line = [Point(x) for x in current]
                points_line_gdf = GeoDataFrame(geometry=points_line,crs=project_crs)
                points_line_gdf['is_split'] = False

                # append all the points together (line points and split points)
                line_all_pnts = points_line_gdf.append(points)

                # Find the distance of each point form the begining of the line on the line.
                line_all_pnts['dis_from_the_start'] = line_all_pnts['geometry'].apply(lambda x:row.geometry.project(x))
                line_all_pnts.sort_values('dis_from_the_start',inplace=True)

                # split the line
                seg =[]
                for point in line_all_pnts.iterrows():
                    prop = point[1]
                    seg.append(prop['geometry'])
                    if prop['is_split']:
                        segments['geometry'].append(LineString(seg))
                        segments['org_id'].append(row.name)
                        seg = [prop['geometry']]
                # if the split point is the last one, you don't need to create new segment
                if len(seg)>1:
                    segments['geometry'].append(LineString(seg))
                    segments['org_id'].append(row.name)
        network_split = GeoDataFrame(data=segments,crs=project_crs)
        network_split.to_file(f'{data_folder}/delete_2_nodes/segments.shp')
        cols_no_geometry = self.my_network.columns[:-1]
        network_split_final = network_split.set_index('org_id')
        network_split_final[cols_no_geometry] =self.my_network[cols_no_geometry]
        # remove old and redundant line from our network and update with new one
        network_split =self.my_network.drop(index=network_split_final.index.unique()).append(network_split_final)
        network_split['length'] = network_split.length
        self.my_network = network_split
        self.my_network.reset_index(drop=True,inplace= True)
        self.my_network.to_file(f'{data_folder}/delete_2_nodes/intersection_network.shp')

    def find_intersection_points(self,row,pbar):
        r"""
        find the intersection points between the two lines
        :param row:
        :return:
        """
        try:
            pbar.update(1)
            line_1 = self.my_network.loc[row.name]
            line_2 =  self.my_network.loc[row['index_right']]
            pnt = line_1.geometry.intersection(line_2.geometry)
            # If there are more than one intersection between two lines, one of the lines should be deleted.
            if isinstance(pnt,LineString): # The intersection is only between the buffer and the point ToDo - when the situation is a overlay line
                return
            if isinstance(pnt,MultiPoint):
                for single_pnt in pnt:
                    self.inter_pnt_dic['geometry'].append(single_pnt)
                    self.inter_pnt_dic['name'].append(row.name)
                return
            # If it is first or end continue OR if there is no intersection between the two lines
            if len(pnt.coords)==0 or pnt.coords[0]==line_1.geometry.coords[0] or pnt.coords[0]==line_1.geometry.coords[-1]:
                return
            self.inter_pnt_dic['geometry'].append(pnt)
            self.inter_pnt_dic['name'].append(row.name)
        except:
            print(f"{row.name},{row['index_right']}:{pnt}")
    def update_names(self, org_gpd:GeoDataFrame):
        """
        It updates the name of those lost their name during the previous process
        :param org_gpd:
        :return:
        """
        df1 = self.my_network
        # Split df1 into two GeoDataFrames: df3 (with names) and df4 (without names)
        df3 = df1[df1['name'].notna()]
        # df3.to_file(f'{data_folder}/delete_2_nodes/with_name.shp')
        df4 = df1[df1['name'].isna()]
        # df4.reset_index().to_file(f'{data_folder}/delete_2_nodes/no_name_init.shp')

        # use only one polyline from the original dataframe for name even if the algorithm may found more
        old_index  ='old_index'

        df = gpd.sjoin(df4, org_gpd).reset_index(names='old_index')
        # Create a new dictionary to store the updated data.
        dic_str_data = {}
        # Define the relevant columns to store
        rel_col  =[col for col in df.columns if col.endswith("right")]+['geometry']
        rel_col.remove('index_right')

        def return_street_name(aplcnts_tst):
            """
            1. "Count the occurrences of polylines with the same name within each aplcnts_tst."
            2. "Return the street if a aplcnts_tst contains only one unique street name."
            3. "If a single street name predominates within a aplcnts_tst, return that name."
            4. "For groups with multiple names, perform a buffer calculation around the respective polylines and determine the largest overlapping area, returning the name associated with that area."
            :param aplcnts_tst: group of applicants. Some of them hold the correct street name
            :return:
            """
            count_names = aplcnts_tst['name_right'].value_counts().sort_values(ascending=False)
            if len(count_names)==1:
                # there is only one name
                my_data = aplcnts_tst.iloc[0]
            elif count_names[1]- count_names[0]>1:
                # The highest number of polylines with the same name are bigger at least in 2:
                my_data = aplcnts_tst[aplcnts_tst['name_right'] == count_names.index[0]].iloc[0]
            else:
                # otherwise filter those with the most popular name or close to (-1)
                str_to_wrk_on  =aplcnts_tst[aplcnts_tst['name_right'].isin(count_names[count_names - count_names[0] < 2].index)]
                buffer_0 = GeoDataFrame(geometry=[str_to_wrk_on.iloc[0]['geometry'].buffer(distance  = 20, cap_style=2)],crs=project_crs) # Buffer around the polyline without name

                streets_right_geo = org_gpd[org_gpd.index.isin(str_to_wrk_on['index_right'])].reset_index() # Get all the applicants polylines and create buffer around
                buffer_1 =GeoDataFrame(geometry=streets_right_geo.buffer(distance  = 20, cap_style=2))
                streets_right_geo['area'] =gpd.overlay(buffer_1, buffer_0, how='intersection').area
                groupy = streets_right_geo.groupby('name')
                my_data_0 = groupy.get_group(groupy.sum()['area'].sort_values(ascending=False).index[0]).sort_values(by= 'area',ascending=False).iloc[0]
                # Get back to the @aplcnts_tst and find the relevant row by comparing index
                my_data = aplcnts_tst[aplcnts_tst['index_right'] == my_data_0['index']].iloc[0]
            # Populate the new dictionary with relevant data
            dic_str_data[my_data['old_index']] = my_data[rel_col].to_list()
        _ =df.groupby(old_index).apply(return_street_name)
        # convert the dictionary into a dataframe.
        updated_df = GeoDataFrame(index=dic_str_data.keys(), data= dic_str_data.values(),columns=[x.replace('_right', '',) for x in rel_col],crs=project_crs)
        updated_df['length'] = updated_df.length
        self.my_network = df3.append(updated_df)

#Roundabout
class EnvEntity:
        def __init__(self,network):
            self.dead_end_fd = None
            self.pnt_dead_end = None
            self.pnt_dic = {}
            self.first_last_dic = {'geometry': [], 'line_name': [], 'position': []}
            self.network = network


        def __populate_pnt_dic(self,point: type, name_of_line: str):
            """
            Make "pnt_dic" contain a list of all the lines connected to each point.
            :param point:
            :param name_of_line:
            :return:
            """
            if not point in self.pnt_dic:
                self.pnt_dic[point] = []
            self.pnt_dic[point].append(name_of_line)

        def __send_pnts(self,temp_line: GeoSeries):
            """
            # Send the first and the last points to populate_pnt_dic
            :return:
            """
            my_geom = temp_line['geometry']
            self.__populate_pnt_dic(my_geom.coords[0], temp_line.name)
            self.__populate_pnt_dic(my_geom.coords[-1], temp_line.name)

        def get_deadend_gdf(self,delete_short:int =30)-> GeoDataFrame:
            self.network.apply(self.__send_pnts, axis=1)

            deadend_list = [item[1][0] for item in self.pnt_dic.items() if len(item[1]) == 1]
            pnt_dead_end_0 = [item for item in self.pnt_dic.items() if len(item[1]) == 1] # Retain all the line points with deadened
            self.pnt_dead_end = [Point(x[0]) for x in pnt_dead_end_0]
            # Create shp file of deadened_pnts
            geometry,line_name = 'geometry','line_name'
            pnt_dead_end_df = GeoDataFrame(data=pnt_dead_end_0)
            pnt_dead_end_df[geometry]= pnt_dead_end_df[0].apply(lambda x:Point(x))
            pnt_dead_end_df[line_name] = pnt_dead_end_df[1].apply(lambda x:x[0])
            pnt_dead_end_df.crs = project_crs
            self.dead_end_fd = pnt_dead_end_df

            if delete_short>0:
                # If it is necessary to eliminate dead-end short segments, it is  important to delete them from the network geodataframe.

                deadend_gdf =self.network.loc[deadend_list]
                self.network.drop(index=deadend_gdf[deadend_gdf.length<delete_short].index,inplace=True)
                return deadend_gdf[deadend_gdf.length>delete_short]
            return self.network.loc[deadend_list]

        def update_the_current_network(self,temp_network):
            r"""
            Update the current network in the new changes
            :param temp_network:
            :return:
            """
            new_network_temp = self.network.drop(index=temp_network.index)
            self.network = new_network_temp.append(temp_network)
            self.network['length'] = self.network.length
            self.network  = self.network[self.network['length']>1]
class Roundabout(EnvEntity):
    def __init__(self,network: GeoDataFrame):
       EnvEntity.__init__(self,network)
       self.pnt_dic ={}
       self.centroid =self.__from_roundabout_to_centroid()
       self.network.rename(columns={'name': 'str_name'}, inplace=True)
    def __from_roundabout_to_centroid(self):
        # Find the center of each roundabout
        # create polygon around each polygon and union
        round_about_buffer = round_about.to_crs(project_crs)['geometry'].buffer(cap_style=1, distance=10,
                                                                                join_style=1).unary_union
        dic_data = {'name': [], 'geometry': []}
        if round_about_buffer.type=='Polygon': # In case we have only one polygon
            dic_data['name'].append(0)
            dic_data['geometry'].append(round_about_buffer.centroid)
        else:
            for ii, xx in enumerate(round_about_buffer):
                dic_data['name'].append(ii)
                dic_data['geometry'].append(xx.centroid)
        centroid =GeoDataFrame(dic_data, crs=project_crs)
        return centroid
        # GeoDataFrame(dic_data,crs=project_crs).to_file(f'{path_round_about}/roundabout_union.shp')

    def __first_last_pnt_of_line(self,row: GeoSeries):
        r"""
        It get geometry of line and fill the first_last_dic with the first and last point and the name of the line
        :return:
        """
        geo = list(row['geometry'].coords)
        self.first_last_dic['geometry'].extend([Point(geo[0]), Point(geo[-1])])
        self.first_last_dic['line_name'].extend([row.name] * 2)
        self.first_last_dic['position'].extend([0, -1])
    def deadend(self):
        r"""
        remove not connected line shorter than 100 meters and then return deadend_list lines and their endpoints (as another file)
        :return:
        """
        # Find the first and last points

        # Get deadend_gdf
        deadend_gdf = self.get_deadend_gdf()

        # Create gdf of line points with the reference to the line they belong
        deadend_gdf.apply(self.__first_last_pnt_of_line, axis=1)
        first_last_gdf = GeoDataFrame(self.first_last_dic, crs=project_crs)


        return deadend_gdf, first_last_gdf
    def __update_geometry(self,cur,s_join):
        r"""
        :return:
        """
        if cur['highway'] == 'footway':
            # Don't snap footway to roundabout
            return cur['geometry']
        # Get only the points that are deadened
        points_lines = [item for item in s_join[s_join['line_name'] == cur.name].iterrows()if item[1]['geometry'] in self.pnt_dead_end]
        if len(points_lines) == 0:
            # No roundabout nearby
            return cur['geometry']
        # get the line geometry to change the first and/ or last point
        geo_cur = list(cur['geometry'].coords)

        # iterate over the deadened points  near roundabout
        for ind in range(len(points_lines)):
            points_line = points_lines[ind]
            geo_cur[points_line[1]['position']] = self.centroid.loc[points_line[1]['index_right']]['geometry'].coords[
                0]
        return LineString(geo_cur)
    def my_spatial_join(self,deadend_lines, deadend_pnts,line_name):
        # Spatial join between roundabout centroid to nearby dead end lines
        # centroid = gpd.read_file(f'{path_round_about}/centroid.shp')
        s_join = gpd.sjoin_nearest(left_df=deadend_pnts, right_df=self.centroid, how='left', max_distance=100,
                                   distance_col='dist').dropna(subset='dist')

        # Deadened lines from both lines should be removed
        lines_to_delete_test = s_join['line_name'].unique() # all the Deadened lines close to roundabout

        # All deadened lines from both lines
        deads_both_side = self.dead_end_fd['line_name'].value_counts()
        deads_both_side =deads_both_side[deads_both_side==2]

        # Remove this lines from the database
        lines_to_delete=deads_both_side[deads_both_side.index.isin(lines_to_delete_test)]

        self.network = self.network[~((self.network[line_name].isin(lines_to_delete.index)) & (self.network.length<300))]
        deadend_lines = deadend_lines[~((deadend_lines[line_name].isin(lines_to_delete.index)) & (deadend_lines.length<300))]
        # Update the geometry so the roundabout will be part of the line geometry
        change_geo = deadend_lines.copy()

        change_geo['geometry'] = change_geo.apply(lambda x:self.__update_geometry(x,s_join), axis=1)

        return change_geo
# endregion

In [79]:
# starting point
new_network = gpd.read_file(f'{data_folder}/simplification/simp.shp').rename(columns={'is_simplif':'is_simplified'})
new_network

Unnamed: 0,name,highway,bearing,group,is_simplified,length,geometry
0,11,path,124.50,-1,0,398.806955,"LINESTRING (857888.020 5626761.570, 857902.213..."
1,11,path,124.50,-1,0,218.742678,"LINESTRING (857638.976 5626560.639, 857618.671..."
2,11,path,124.50,-1,0,205.477671,"LINESTRING (854914.954 5625472.999, 854917.670..."
3,14,residential,106.10,-1,0,168.300305,"LINESTRING (856136.574 5627425.278, 856138.912..."
4,18,path,40.85,-1,0,137.408522,"LINESTRING (860173.665 5627099.066, 860141.427..."
...,...,...,...,...,...,...,...
14562,Vicolo Santa Maria,pedestrian,25.20,-1,0,37.798613,"LINESTRING (854753.997 5632849.499, 854774.402..."
14563,Vicolo Valtorta,residential,26.50,-1,0,44.193187,"LINESTRING (854694.542 5629305.469, 854722.494..."
14564,Viottolo del Cral,footway,48.40,-1,0,76.615143,"LINESTRING (859390.699 5636638.352, 859320.123..."
14565,strada alle sei ville,footway,96.10,-1,0,186.690723,"LINESTRING (857998.760 5631175.272, 858020.668..."


In [80]:
# Test intersection network
num=0
new_gpd = new_network.copy()
obj_intersection = Intersection(new_gpd,num)


In [81]:

# First remove_false_nodes
my_network = remove_false_nodes(obj_intersection.my_network).reset_index(drop=True)
my_network.to_file(f'{data_folder}/delete_2_nodes/delete_false_intersection.shp')
# Create buffer around each element
buffer_around_lines= my_network['geometry'].buffer(cap_style=3, distance=1, join_style=3)


# s_join between buffer to lines
s_join_0 =gpd.sjoin(left_df=GeoDataFrame(geometry=buffer_around_lines,crs=project_crs),right_df=my_network)

# delete lines belong to the buffer
s_join = s_join_0[s_join_0.index!=s_join_0['index_right']]
s_join.reset_index().to_file(f'{data_folder}/delete_2_nodes/s_join.shp')

In [82]:
intersect_by_line =[]
#  find_intersection_points by function
inter_pnt_dic = {'geometry':[],'name':[]}
# Find new intersections that are not at the beginning or end of the line
def find_intersection_points(row):
        r"""
        find the intersection points between the two lines
        :param row:
        :return:
        """
        try:
            line_1 = my_network.loc[row.name]
            line_2 =  my_network.loc[row['index_right']]
            pnt = line_1.geometry.intersection(line_2.geometry)
            if isinstance(pnt,LineString): # The intersection is only between the buffer and the point ToDo - when the situation is a overlay line
                intersect_by_line.append(row)
                return
            if isinstance(pnt,MultiPoint):
                for single_pnt in pnt:
                    inter_pnt_dic['geometry'].append(single_pnt)
                    inter_pnt_dic['name'].append(row.name)
                return
            # If it is first or end continue
            if len(pnt.coords)==0 or pnt.coords[0]==line_1.geometry.coords[0] or pnt.coords[0]==line_1.geometry.coords[-1]:
                return
            inter_pnt_dic['geometry'].append(pnt)
            inter_pnt_dic['name'].append(row.name)
        except:
            intersect_by_line.append(row)
            print(f"{row.name},{row['index_right']}:{pnt}")
s_join.apply(lambda x: find_intersection_points(x), axis=1)
inter_pnt_gdf = GeoDataFrame(inter_pnt_dic,crs=project_crs)
inter_pnt_gdf.to_file(f'{data_folder}/delete_2_nodes/inter_pnt_dic.shp')

In [146]:
# Split string line by one point

segments = {'geometry':[],'org_id':[]}
# Groupby points name (which is the line they should split)
line_name = 588
group_pnts =  inter_pnt_gdf.groupby('name').get_group(line_name)
points  = group_pnts
points['is_split'] = True
row = my_network.loc[line_name]
current = list(row.geometry.coords)
points_line = [Point(x) for x in current]
points_line_gdf = GeoDataFrame(geometry=points_line,crs=project_crs)
points_line_gdf['is_split'] = False
line_all_pnts = points_line_gdf.append(points)
line_all_pnts['dis_from_the_start'] = line_all_pnts['geometry'].apply(lambda x:row.geometry.project(x))
line_all_pnts.sort_values('dis_from_the_start',inplace=True)
seg =[]
for point in line_all_pnts.iterrows():
    prop = point[1]
    seg.append(prop['geometry'])
    if prop['is_split']:
        segments['geometry'].append(LineString(seg))
        segments['org_id'].append(row.name)
        seg = [prop['geometry']]
# if the split point is the last one, you don't need to create new segment
if len(seg)>1:
    segments['geometry'].append(LineString(seg))
    segments['org_id'].append(row.name)
segments

{'geometry': [<shapely.geometry.linestring.LineString at 0x1f1e5726620>,
  <shapely.geometry.linestring.LineString at 0x1f1e5726da0>,
  <shapely.geometry.linestring.LineString at 0x1f1e5724ac0>,
  <shapely.geometry.linestring.LineString at 0x1f1eb9105b0>],
 'org_id': [588, 588, 588, 588]}

In [148]:
line_all_pnts

Unnamed: 0,geometry,is_split,name,dis_from_the_start
0,POINT (856149.844 5631584.217),False,,0.0
2187,POINT (856136.621 5631556.008),True,588.0,31.154519
2188,POINT (856136.177 5631555.061),True,588.0,32.20086
2186,POINT (856059.789 5631392.093),True,588.0,212.182889
1,POINT (856048.799 5631368.647),False,,238.076604


In [126]:
# Split string line by all
segments = {'geometry':[],'org_id':[]}
# Groupby points name (which is the line they should split)

my_groups =  inter_pnt_gdf.groupby('name')
for group_pnts in my_groups :
    points  = group_pnts[1]
    points['is_split'] = True

    # get the line to split by comparing the name
    row = my_network.loc[group_pnts[0]]
    current = list(row.geometry.coords)
    points_line = [Point(x) for x in current]
    points_line_gdf = GeoDataFrame(geometry=points_line,crs=project_crs)
    points_line_gdf['is_split'] = False

    # append all the points together (line points and split points)
    line_all_pnts = points_line_gdf.append(points)

    # Find the distance of each point form the begining of the line on the line.
    line_all_pnts['dis_from_the_start'] = line_all_pnts['geometry'].apply(lambda x:row.geometry.project(x))
    line_all_pnts.sort_values('dis_from_the_start',inplace=True)

    # split the line
    seg =[]
    for point in line_all_pnts.iterrows():
        prop = point[1]
        seg.append(prop['geometry'])
        if prop['is_split']:
            segments['geometry'].append(LineString(seg))
            segments['org_id'].append(row.name)
            seg = [prop['geometry']]
    # if the split point is the last one, you don't need to create new segment
    if len(seg)>1:
        segments['geometry'].append(LineString(seg))
        segments['org_id'].append(row.name)
network_split = GeoDataFrame(segments,crs=project_crs)
network_split.to_file(f'{data_folder}/delete_2_nodes/segments.shp')
cols_no_geometry = my_network.columns[:-1]
network_split_final = network_split.set_index('org_id')
network_split_final[cols_no_geometry] =my_network[cols_no_geometry]
# remove old and redundant line from our network and update with new one
network_split =my_network.drop(index=network_split_final.index.unique()).append(network_split_final)
network_split['length'] = network_split.length
my_network = network_split
my_network.reset_index(drop=True,inplace= True)
my_network.to_file(f'{data_folder}/delete_2_nodes/intersection_network.shp')

In [154]:
# Call to intersection_network as part of its class
num=0
new_gpd = new_network.copy()
obj_intersection = Intersection(new_gpd,num)
obj_intersection.intersection_network()


100%|██████████| 37316/37316 [00:06<00:00, 5781.14it/s]
100%|██████████| 1915/1915 [00:11<00:00, 171.26it/s]


In [150]:
# Test update_names
org_gpd= new_network.copy()
df1 = obj_intersection.my_network
df1.to_file(f'{data_folder}/intersection/source.shp')


# Split df1 into two GeoDataFrames: df3 (with names) and df4 (without names)
df3 = df1[df1['name'].notna()]
# df3.to_file(f'{data_folder}/delete_2_nodes/with_name.shp')
df4 = df1[df1['name'].isna()]
# df4.reset_index().to_file(f'{data_folder}/delete_2_nodes/no_name_init.shp')

# use only one polyline from the original dataframe for name even if the algorithm may found more
old_index  ='old_index'

df4_as_buffer= GeoDataFrame(geometry=df4['geometry'].buffer(distance  = 2, cap_style=2),crs=project_crs)
df = gpd.sjoin(df4_as_buffer, org_gpd) # for spatial join use buffer around each polyline.that provide better result
df.index.name = old_index
df['geometry'] = df4['geometry'] # bring the dataframe into linestring format
df.reset_index(inplace=True) # To be consistent with the following code and other dataframe

In [None]:
# Create a new dictionary to store the updated data.
dic_str_data = []


def return_street_name(aplcnts_tst):
    """
    1. "Count the occurrences of polylines with the same name within each aplcnts_tst."
    2. "Return the street if a aplcnts_tst contains only one unique street name."
    3. "If a single street name predominates within a aplcnts_tst, return that name."
    4. "For groups with multiple names, perform a buffer calculation around the respective polylines and determine the largest overlapping area, returning the name associated with that area."
    :param aplcnts_tst: group of applicants. Some of them hold the correct street name
    :return:
    """
    count_names = aplcnts_tst['name'].value_counts().sort_values(ascending=False)
    if len(count_names)==1:
        # there is only one name
        my_data = aplcnts_tst.iloc[0]
    elif count_names[1]- count_names[0]>1:
        # The highest number of polylines with the same name are bigger at least in 2:
        my_data = aplcnts_tst[aplcnts_tst['name'] == count_names.index[0]].iloc[0]
    else:
        # otherwise filter those with the most popular name or close to (-1)
        str_to_wrk_on  =aplcnts_tst[aplcnts_tst['name'].isin(count_names[count_names - count_names[0] < 2].index)]
        buffer_0 = GeoDataFrame(geometry=[str_to_wrk_on.iloc[0]['geometry'].buffer(distance  = 20, cap_style=2)],crs=project_crs) # Buffer around the polyline without name

        streets_right_geo = org_gpd[org_gpd.index.isin(str_to_wrk_on['index_right'])].reset_index() # Get all the applicants polylines and create buffer around
        buffer_1 =GeoDataFrame(geometry=streets_right_geo.buffer(distance  = 20, cap_style=2))
        streets_right_geo['area'] =gpd.overlay(buffer_1, buffer_0, how='intersection').area
        groupy = streets_right_geo.groupby('name')
        my_data_0 = groupy.get_group(groupy.sum()['area'].sort_values(ascending=False).index[0]).sort_values(by= 'area',ascending=False).iloc[0]
        # Get back to the @aplcnts_tst and find the relevant row by comparing index
        my_data = aplcnts_tst[aplcnts_tst['index_right'] == my_data_0['index']].iloc[0]
    # Populate the new dictionary with relevant data
    dic_str_data.append(my_data.to_list())
_ =df.groupby(old_index).apply(return_street_name)
updated_df = GeoDataFrame(data= dic_str_data,columns=df.columns,crs=project_crs).drop(columns='index_right').set_index(old_index)
updated_df['length'] = updated_df.length
my_network = df3.append(updated_df)
my_network.reset_index().to_file(f'{data_folder}/intersection/after_name_update.shp')
# convert the dictionary into a dataframe.

Code to consolidate nearest intersections
1. Get the first/start of each line
2. Make sure I have the name of the lines associated with these lines
3. Use DBSCAN with 20 meters threshold
4. For each group
   4.1.Find the point with the max number of connected lines, if it is one use it otherwise use the avarage
   4.2 Among whom are updated remove every line the start and lase point are the same
   4.3 Change the point of each line with new point


In [4]:
intersection_agg_folder = f'{data_folder}/intersection_agg/'
network = gpd.read_file(f'{data_folder}/final_test.shp')
network

Unnamed: 0,index,line_name,name,highway,group,is_simplif,length,geometry
0,0,2.0,11,path,-1.0,0.0,291.082651,"LINESTRING (854914.954 5625472.999, 854917.670..."
1,1,7.0,Ciclabile Carducci - Biglieri,cycleway,-1.0,0.0,65.403420,"LINESTRING (853836.046 5627755.961, 853819.893..."
2,2,8.0,Ciclabile Carducci - Biglieri,cycleway,-1.0,0.0,123.124385,"LINESTRING (853800.691 5627619.739, 853799.733..."
3,3,11.0,Corso Alberto Picco,residential,-1.0,0.0,13.477679,"LINESTRING (858208.731 5631102.951, 858213.618..."
4,4,12.0,Corso Alberto Picco,residential,-1.0,0.0,36.567639,"LINESTRING (858217.547 5631151.853, 858215.388..."
...,...,...,...,...,...,...,...,...
11629,12753,10189.0,Via Emanuele Artom,tertiary,0.0,1.0,191.401342,"LINESTRING (851556.905 5623002.577, 851583.718..."
11630,12765,7094.0,Via Val della Torre,residential,0.0,1.0,462.458055,"LINESTRING (853032.197 5636284.321, 852570.276..."
11631,12772,10879.0,Via Zino Giacomo Zini,tertiary,0.0,1.0,373.516854,"LINESTRING (852633.347 5626933.783, 852500.369..."
11632,12869,7175.0,Viale Piave,track,0.0,1.0,163.356689,"LINESTRING (858936.882 5626597.392, 858949.312..."


In [5]:
# 1. Get the first/start of each line
# Extract unique start and end points from all LineStrings
geometry= 'geometry'
index_right= 'index_right'
all_points = network[geometry].apply(lambda line: [Point(line.coords[0]), Point(line.coords[-1])]).explode()
# # Create a GeoSeries of unique points
unique_points = GeoDataFrame(geometry=gpd.GeoSeries(all_points).unique(),crs=project_crs)

In [34]:
# save data
unique_points.to_file(f'{intersection_agg_folder}unique_points.shp')

In [6]:
# 2. Make sure I have the name of the lines associated with these lines
pnts_line_name = unique_points.sjoin(network)[[index_right,geometry]].reset_index().dissolve(by='index',aggfunc=lambda x: x.tolist())
pnts_line_name['num_of_lines']= pnts_line_name[index_right].apply(len) # count the number of lines for each point

In [59]:
# 3. Use DBSCAN with 20 meters threshold
# Extract coordinates for DBSCAN
coordinates = pnts_line_name.geometry.apply(lambda point: (point.x, point.y)).tolist()
dbscan = DBSCAN(eps=40, min_samples=2)
pnts_line_name['group'] = dbscan.fit_predict(coordinates)
lines_to_update = pnts_line_name[pnts_line_name['group']>-1]


In [60]:
# if you want to save the files
def save_points_file(data,path):
    """
    The function get a data, arrange columns, convert list to string and export  it into a shpfile
    :param data:
    :param path:
    :return:
    """
    col_of_lists_as_str= 'col_of_lists_as_str'
    data[col_of_lists_as_str] = data[index_right].apply(str)
    data.drop(columns= [index_right]).to_file(path)
    data.drop(columns= [col_of_lists_as_str],inplace=True)
save_points_file(pnts_line_name,f'{intersection_agg_folder}pnts_line_name.shp')
save_points_file(lines_to_update,f'{intersection_agg_folder}lines_to_update.shp')

In [61]:
# 4.1.Find the point with the max number of connected lines, if it is one use it otherwise uses the average
# Find the maximum 'num' value for each group
num = 'num_of_lines'
group_name  = 'group'
new_geometry= 'new_geometry'
max_values_per_group =lines_to_update.groupby('group')['num_of_lines'].max()
# Filter rows with the maximum 'num' value for each group
result_gdf = lines_to_update[lines_to_update.set_index([group_name, num]).index.isin(max_values_per_group.items())]

# Custom aggregation function to calculate the average point for each group
def calculate_average_point(group):
    x_mean = group.x.mean()
    y_mean = group.y.mean()
    return Point(x_mean, y_mean)

# Apply the custom aggregation function to calculate average points per group
lines_to_update2= lines_to_update.set_index(group_name)
lines_to_update2['new_geometry'] = result_gdf.groupby(group_name )[geometry].apply(calculate_average_point)
lines_to_update2

Unnamed: 0_level_0,geometry,index_right,num_of_lines,new_geometry
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,POINT (853836.046 5627755.961),"[1, 4433, 4897, 4901]",4,POINT (853836.046 5627755.961)
1,POINT (853819.893 5627692.584),"[1, 4892, 4889, 6708]",4,POINT (853815.797 5627693.643)
2,POINT (853800.691 5627619.739),"[4889, 2, 4888, 4428]",4,POINT (853800.691 5627619.739)
3,POINT (853766.237 5627501.556),"[2, 6694, 4456, 4884]",4,POINT (853766.237 5627501.556)
4,POINT (858208.731 5631102.951),"[10471, 3, 5364]",3,POINT (858211.174 5631109.231)
...,...,...,...,...
380,POINT (853842.264 5633595.654),[11615],1,POINT (853818.031 5633608.400)
751,POINT (857839.874 5637898.540),[11622],1,POINT (857833.551 5637897.696)
557,POINT (858117.900 5636518.175),[11624],1,POINT (858102.817 5636493.742)
568,POINT (859165.963 5633417.646),[11625],1,POINT (859182.248 5633414.527)


In [62]:
# 4.2 Among whom are updated remove every line the start and last point are the same
# Get all the lines going to be deleted
lines_to_delete =[]

def update_lines_to_delete(row):
    # explode the lines names within each row list to separate rows
    lines_to_update_tmep = row[index_right].explode()

    # Identify rows with duplicate values
    lines_to_delete.extend(lines_to_update_tmep[lines_to_update_tmep.duplicated()].tolist())

lines_to_update2.groupby(level=group_name).apply(update_lines_to_delete)

# remove lines their geometry not going to change
lines_to_update3= lines_to_update2[lines_to_update2[geometry]!=lines_to_update2[new_geometry]]
lines_to_update3

Unnamed: 0_level_0,geometry,index_right,num_of_lines,new_geometry
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,POINT (853819.893 5627692.584),"[1, 4892, 4889, 6708]",4,POINT (853815.797 5627693.643)
4,POINT (858208.731 5631102.951),"[10471, 3, 5364]",3,POINT (858211.174 5631109.231)
4,POINT (858213.618 5631115.511),"[3, 10588, 4]",3,POINT (858211.174 5631109.231)
5,POINT (856964.680 5634042.802),"[16, 14, 102]",3,POINT (856951.761 5634048.352)
5,POINT (856938.843 5634053.901),"[14, 2411, 100]",3,POINT (856951.761 5634048.352)
...,...,...,...,...
380,POINT (853842.264 5633595.654),[11615],1,POINT (853818.031 5633608.400)
751,POINT (857839.874 5637898.540),[11622],1,POINT (857833.551 5637897.696)
557,POINT (858117.900 5636518.175),[11624],1,POINT (858102.817 5636493.742)
568,POINT (859165.963 5633417.646),[11625],1,POINT (859182.248 5633414.527)


In [63]:
# Save files
lines_to_update2['new_geometry'].to_file(f'{intersection_agg_folder}new_geometry.shp')
lines_to_update3['new_geometry'].to_file(f'{intersection_agg_folder}new_geometry_1.shp')

In [64]:
# For test only
network_delete = network[network.index.isin(lines_to_delete)]
network_delete.reset_index().to_file(f'{intersection_agg_folder}network_delete.shp')

In [65]:
# 4.3 Change the point of each line with new point
network_new = network[~network.index.isin(lines_to_delete)]
def update_network_with_aggregated_point(group):
    lines_in_group = group.explode(index_right)

    def update_one_line(points_data):
        if points_data.name not in lines_to_delete:
            updated_line_geo =network_new.loc[points_data.name]
            line_coords = updated_line_geo.geometry.coords
            if Point(line_coords[0])==points_data.geometry:
                network_new.at[points_data.name,geometry] = LineString([points_data[new_geometry]] + line_coords[1:])
            elif Point(line_coords[-1])==points_data.geometry:
                network_new.at[points_data.name,geometry] = LineString(line_coords[:-1]+[points_data[new_geometry]])
            else:
                print(points_data)
                print(lines_in_group)
    lines_in_group.set_index(index_right).apply(update_one_line,axis=1)

lines_to_update3.groupby(level=group_name).apply(update_network_with_aggregated_point)

In [31]:
# 4.3 on one point and one group
lines_to_update3.groupby(level=group_name).get_group(338)
network_new.loc[9018]['geometry'].coords[0]
if 5952 in lines_to_delete:
    print(True)
lines_to_update3
network_new.to_file(f'{intersection_agg_folder}network_new.shp')

Unnamed: 0_level_0,geometry,index_right,num_of_lines,new_geometry
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,POINT (853819.893 5627692.584),"[1, 4892, 4889, 6708]",4,POINT (853815.797 5627693.643)
4,POINT (858208.731 5631102.951),"[10471, 3, 5364]",3,POINT (858211.174 5631109.231)
4,POINT (858213.618 5631115.511),"[3, 10588, 4]",3,POINT (858211.174 5631109.231)
5,POINT (852167.979 5625542.157),"[22, 1353, 1354]",3,POINT (852171.485 5625540.306)
6,POINT (862052.983 5630797.754),"[23, 10650, 5503]",3,POINT (862054.541 5630796.832)
...,...,...,...,...
377,POINT (856977.654 5631406.992),[11597],1,POINT (856985.795 5631390.656)
554,POINT (851302.770 5638186.464),[11609],1,POINT (851295.895 5638180.756)
559,POINT (851804.954 5636013.425),[11610],1,POINT (851797.830 5636020.391)
558,POINT (857839.874 5637898.540),[11622],1,POINT (857833.551 5637897.696)


In [67]:
import geopandas as gpd
from shapely.geometry import LineString

# Create two polylines (LineString objects)
line1 = LineString([(0, 0), (1, 1), (2, 0)])
line2 = LineString([(0, 1), (1, 1), (2, 1)])

# Create a GeoDataFrame with the two polylines
gdf = gpd.GeoDataFrame(geometry=[line1, line2])

# Display the original GeoDataFrame
print("Original GeoDataFrame:")
print(gdf)

# Compute the difference between the two polylines
result = gdf.difference(gdf.iloc[1].geometry)

# Create a new GeoDataFrame with the result
result_gdf = gpd.GeoDataFrame(geometry=result)

# Display the resulting GeoDataFrame
print("\nGeoDataFrame after removing overlay:")
print(result_gdf)

Original GeoDataFrame:
                                            geometry
0  LINESTRING (0.00000 0.00000, 1.00000 1.00000, ...
1  LINESTRING (0.00000 1.00000, 1.00000 1.00000, ...

GeoDataFrame after removing overlay:
                                            geometry
0  MULTILINESTRING ((0.00000 0.00000, 1.00000 1.0...
1                                   LINESTRING EMPTY


In [71]:
result_gdf.iloc[0].explore()

AttributeError: 'Series' object has no attribute 'explore'