In [18]:
import geopandas as gpd
from shapely.geometry import Point, LineString
from geopandas import GeoDataFrame

from sklearn.cluster import DBSCAN

import os

project_crs = 'epsg:3857'

place ='Turin_Italy'
pjr_loc = os.path.dirname(os.getcwd())
data_folder= os.path.join(pjr_loc, f'places/{place}')
test_data_folder = os.path.join(data_folder, 'test/Aggregation')

# if you want to save the files
def save_points_file(data, path):
    """
    The function get a data, arrange columns, convert list to string and export  it into a shpfile
    :param data:
    :param path:
    :return:
    """
    col_of_lists_as_str = 'col_of_lists_as_str'
    data[col_of_lists_as_str] = data[index_right].apply(str)
    data.drop(columns=[index_right]).to_file(path)
    data.drop(columns=[col_of_lists_as_str], inplace=True)

In [13]:
# Aggregation
print('Aggregate intersections')
network= gpd.read_file(f'{data_folder}/network.shp')


# 1. Get the first/start of each line
# Extract unique start and end points from all LineStrings
geometry = 'geometry'
index_right = 'index_right'
all_points = network[geometry].apply(lambda line: [Point(line.coords[0]), Point(line.coords[-1])]).explode()
# # Create a GeoSeries of unique points
unique_points = GeoDataFrame(geometry=gpd.GeoSeries(all_points).unique(), crs=project_crs)
# save data
unique_points.to_file(f'{test_data_folder}/unique_points.shp')

In [17]:
# 2. Make sure I have the name of the lines associated with these lines
pnts_line_name = unique_points.sjoin(network)[[index_right, geometry]].reset_index().dissolve(by='index',
                                                                                              aggfunc=lambda
                                                                                                  x: x.tolist())
pnts_line_name['num_of_lines'] = pnts_line_name[index_right].apply(len) 
save_points_file(pnts_line_name, f'{test_data_folder}/pnts_line_name.shp')# count the number of lines for each point

In [8]:



# 3. Use DBSCAN with 20 meters threshold
# Extract coordinates for DBSCAN
coordinates = pnts_line_name.geometry.apply(lambda point: (point.x, point.y)).tolist()
dbscan = DBSCAN(eps=40, min_samples=2)
pnts_line_name['group'] = dbscan.fit_predict(coordinates)
lines_to_update = pnts_line_name[pnts_line_name['group'] > -1]





# 4.1.Find the point with the max number of connected lines, if it is one use it otherwise uses the average
# Find the maximum 'num' value for each group
num = 'num_of_lines'
group_name = 'group'
new_geometry = 'new_geometry'
max_values_per_group = lines_to_update.groupby('group')['num_of_lines'].max()
# Filter rows with the maximum 'num' value for each group
result_gdf = lines_to_update[
    lines_to_update.set_index([group_name, num]).index.isin(list(max_values_per_group.items()))]


# Custom aggregation function to calculate the average point for each group
def calculate_average_point(group):
    x_mean = group.x.mean()
    y_mean = group.y.mean()
    return Point(x_mean, y_mean)


# Apply the custom aggregation function to calculate average points per group
lines_to_update2 = lines_to_update.set_index(group_name)
lines_to_update2['new_geometry'] = result_gdf.groupby(group_name)[geometry].apply(calculate_average_point)

# 4.2 Among whom are updated remove every line the start and last point are the same
# Get all the lines going to be deleted
lines_to_delete = []


def update_lines_to_delete(row):
    # explode the lines names within each row list to separate rows
    lines_to_update_tmep = row[index_right].explode()

    # Identify rows with duplicate values
    lines_to_delete.extend(lines_to_update_tmep[lines_to_update_tmep.duplicated()].tolist())


lines_to_update2.groupby(level=group_name).apply(update_lines_to_delete)

# remove lines their geometry not going to change
lines_to_update3 = lines_to_update2[lines_to_update2[geometry] != lines_to_update2[new_geometry]]

# 4.3 Change the point of each line with new point
network_new = network[~network.index.isin(lines_to_delete)]


def update_network_with_aggregated_point(group):
    lines_in_group = group.explode(index_right)

    def update_one_line(points_data):
        if points_data.name not in lines_to_delete:
            updated_line_geo = network_new.loc[points_data.name]
            line_coords = updated_line_geo.geometry.coords
            if Point(line_coords[0]) == points_data.geometry:
                network_new.at[points_data.name, geometry] = LineString(
                    [points_data[new_geometry]] + line_coords[1:])
            elif Point(line_coords[-1]) == points_data.geometry:
                network_new.at[points_data.name, geometry] = LineString(
                    line_coords[:-1] + [points_data[new_geometry]])
            else:
                print(points_data)
                print(lines_in_group)

    lines_in_group.set_index(index_right).apply(update_one_line, axis=1)


lines_to_update3.groupby(level=group_name).apply(update_network_with_aggregated_point)

line_name = 'line_name'

network_new.to_file(f'{data_folder}/network_new.shp')