In [1]:

import os
import glob
import math
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd
import torch
from collections import defaultdict

import processing_io as pio
from torch_geometric.transforms import LineGraph

from torch_geometric.data import Data, Batch
import shapely.wkt as wkt
from tqdm import tqdm
import fiona
import os

import alphashape
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from shapely.geometry import Point
import random

highway_mapping = {
    'trunk': 0, 'trunk_link': 0, 'motorway_link': 0,
    'primary': 1, 'primary_link': 1,
    'secondary': 2, 'secondary_link': 2,
    'tertiary': 3, 'tertiary_link': 3,
    'residential': 4, 'living_street': 5,
    'pedestrian': 6, 'service': 7,
    'construction': 8, 'unclassified': 9,
    'np.nan': -1
}
result_df_name = 'sim_output_1pm_capacity_reduction_10k_PRELIMINARY'
# result_df_name = 'sim_output_1pm_capacity_reduction_10k'

result_path = '../../../../data/datasets_simulation_outputs/' + result_df_name + '.pt'
string_is_for_1pm = "pop_1pm"

base_dir_sample_sim_input = '../../../../data/' + string_is_for_1pm + '_simulations/' + string_is_for_1pm + '_policies_combinations_with_normal_dist/'
subdirs_pattern = os.path.join(base_dir_sample_sim_input, 'output_networks_*')
subdirs = list(set(glob.glob(subdirs_pattern)))
subdirs.sort()

paris_inside_bvd_peripherique = "../../../../data/paris_inside_bvd_per/referentiel-comptages-edit.shp"
gdf_paris_inside_bvd_per = gpd.read_file(paris_inside_bvd_peripherique)
boundary_df = alphashape.alphashape(gdf_paris_inside_bvd_per, 435).exterior[0]
linear_ring_polygon = Polygon(boundary_df)

gdf_basecase_output_links = gpd.read_file('results/' + string_is_for_1pm + '_basecase_average_output_links.geojson')
gdf_basecase_average_mode_stats = pd.read_csv('results/' + string_is_for_1pm + '_basecase_average_mode_stats.csv', delimiter=';')
districts = gpd.read_file("../../../../data/visualisation/districts_paris.geojson")

## Abstract

This is further than process_output_of_simulations_with_all_output_links_and_eqasim_info.ipynb, as it also includes more input information.

Note that there is more than one strategy to deal with the fact that there are more than one district per link. We implement the strategy of stacking the information of all districts together. 
An alternative strategy would be to use the mean of the information of the districts.

Process the districts manually, so that each link belongs to at most 3 districts.

## Process results

Process the outputs of the simulations for further usage by GNN.

In [2]:
def compute_close_homes(links_gdf_input:pd.DataFrame, information_gdf_input:pd.DataFrame, utm_crs:str, distance:int=50):
    links_gdf = links_gdf_input.copy()
    information_gdf = information_gdf_input.copy()
    close_places = []
    links_gdf_utm = links_gdf.to_crs(utm_crs)
    information_gdf_utm = information_gdf.to_crs(utm_crs)
    for i, row in tqdm(enumerate(links_gdf_utm.iterrows()), desc="Processing rows", unit="row"):
        buffer_utm = row[1].geometry.buffer(distance=distance)
        buffer = gpd.GeoSeries([buffer_utm], crs=utm_crs).to_crs(links_gdf_utm.crs)[0]
        matched_information = information_gdf_utm[information_gdf_utm.geometry.within(buffer)]
        socioprofessional_classes = matched_information['socioprofessional_class'].tolist()
        close_places.append((len(socioprofessional_classes), socioprofessional_classes))
    return close_places

def process_close_count_to_tensor(close_count_list: list):
    socio_professional_classes = [item[1] for item in close_count_list]
    unique_classes = set([2, 3, 4, 5, 6, 7, 8])
    class_to_index = {cls: idx for idx, cls in enumerate(unique_classes)}

    tensor_shape = (len(close_count_list), len(unique_classes))
    close_homes_tensor = torch.zeros(tensor_shape)

    for i, classes in enumerate(socio_professional_classes):
        for cls in classes:
            if cls in class_to_index:  # Ensure the class is in the predefined set
                close_homes_tensor[i, class_to_index[cls]] += 1
    
    close_homes_tensor_sparse = close_homes_tensor.to_sparse()
    return close_homes_tensor_sparse

# Read all network data into a dictionary of GeoDataFrames
def compute_result_dic_output_links():
    result_dic = {}
    base_network_no_policies = gdf_basecase_output_links
    result_dic["base_network_no_policies"] = base_network_no_policies
    for subdir in tqdm(subdirs, desc="Processing subdirs", unit="subdir"):
        # print(f'Accessing folder: {subdir}')
        # print(len(os.listdir(subdir)))
        networks = [network for network in os.listdir(subdir) if not network.endswith(".DS_Store")]
        for network in networks:
            file_path = os.path.join(subdir, network)
            policy_key = pio.create_policy_key_1pm(network)
            df_output_links = pio.read_output_links(file_path)
            df_output_links.drop(columns=['geometry'], inplace=True)
            if (df_output_links is not None):
                gdf_extended = pio.extend_geodataframe(gdf_base=gdf_basecase_output_links, gdf_to_extend=df_output_links, column_to_extend='highway', new_column_name='highway')
                gdf_extended = pio.extend_geodataframe(gdf_base=gdf_basecase_output_links, gdf_to_extend=gdf_extended, column_to_extend='vol_car', new_column_name='vol_car_base_case')
                result_dic[policy_key] = gdf_extended
        break
    return result_dic

def calculate_averaged_results(trips_df):
    """Calculate average travel time and routed distance grouped by mode."""
    return trips_df.groupby('mode').agg(
        total_travel_time=('travel_time', 'mean'),
        total_routed_distance=('routed_distance', 'mean')
    ).reset_index()

def compute_result_dic_mode_stats(calculate_averaged_results):
    result_dic_mode_stats = {}
    result_dic_mode_stats["base_network_no_policies"] = gdf_basecase_average_mode_stats
    for subdir in tqdm(subdirs, desc="Processing subdirs", unit="subdir"):
        networks = [network for network in os.listdir(subdir) if not network.endswith(".DS_Store")]
        for network in networks:
            file_path = os.path.join(subdir, network)
            policy_key = pio.create_policy_key_1pm(network)
            df_mode_stats = pd.read_csv(file_path + '/eqasim_trips.csv', delimiter=';')
            averaged_results = calculate_averaged_results(df_mode_stats)
            if (averaged_results is not None):
                result_dic_mode_stats[policy_key] = averaged_results
        break
    return result_dic_mode_stats

def encode_modes(gdf):
    """Encode the 'modes' attribute based on specific strings."""
    modes_conditions = {
        'car': gdf['modes'].str.contains('car', case=False, na=False).astype(int),
        'bus': gdf['modes'].str.contains('bus', case=False, na=False).astype(int),
        'pt': gdf['modes'].str.contains('pt', case=False, na=False).astype(int),
        'train': gdf['modes'].str.contains('train', case=False, na=False).astype(int),
        'rail': gdf['modes'].str.contains('rail', case=False, na=False).astype(int),
        'subway': gdf['modes'].str.contains('subway', case=False, na=False).astype(int)
    }
    modes_encoded = pd.DataFrame(modes_conditions)
    return torch.tensor(modes_encoded.values, dtype=torch.float)


def encode_modes_string(mode_string):
    """Encode the 'modes' attribute based on specific strings."""
    modes_conditions = {
        'car': int("car" in mode_string),
        'bus': int("bus" in mode_string),
        'pt': int("pt" in mode_string),
        'train': int("train" in mode_string),
        'rail': int("rail" in mode_string),
        'subway': int("subway" in mode_string),
    }
    modes_encoded_tensor = torch.tensor(list(modes_conditions.values()), dtype=torch.float)
    return modes_encoded_tensor

def get_dfs(base_dir:str):
    files = os.listdir(base_dir)
    for file in files:
        file_path = os.path.join(base_dir, file)
        base_name, ext = os.path.splitext(file)
        if base_name.startswith("idf_1pm_"):
            base_name = base_name.replace("idf_1pm_", "")
        var_name = base_name  # Start with the cleaned base name
    
        if file.endswith('.csv'):
            try:
                var_name = f"{var_name}_df"  
                globals()[var_name] = pd.read_csv(file_path, sep=";")
                print(f"Loaded CSV file: {file} into variable: {var_name}")
            except Exception as e:
                print(f"Error loading CSV file {file}: {e}")
            
        elif file.endswith('.gpkg'):
            try:
                var_name = f"{var_name}_gdf"  
                layers = fiona.listlayers(file_path)
                geodataframes = {layer: gpd.read_file(file_path, layer=layer, geometry = 'geometry', crs="EPSG:2154") for layer in layers}
                for layer, gdf in geodataframes.items():
                # print(f"Layer: {layer}")
                    gdf = gdf.to_crs(epsg=4326)
                    globals()[var_name] = gdf
                    print(f"Loaded GPKG file: {file} into variable: {var_name}")
            except Exception as e:
                print(f"Error loading CSV file {file}: {e}")
    homes_gdf = globals()["homes_gdf"]
    households_df = globals()["households_df"]
    persons_df = globals()["persons_df"]
    activities_gdf = globals()["activities_gdf"]
    trips_df = globals()["trips_gdf"]
    return homes_gdf, households_df, persons_df, activities_gdf, trips_df

def extract_start_end_points(geometry):
    if len(geometry.coords) != 2:
        raise ValueError("Linestring does not have exactly 2 elements.")
    return geometry.coords[0], geometry.coords[-1]

def get_close_trips_tensor(links_gdf_input, trips_gdf_input, utm_crs, distance):
    close_trips_count = compute_close_homes(links_gdf_input = links_gdf_input, information_gdf_input = trips_gdf_input, utm_crs = utm_crs, distance=distance)
    close_trips_count_tensor = process_close_count_to_tensor(close_trips_count)
    return close_trips_count, close_trips_count_tensor

def get_start_and_end_gdf(trips_with_socio, crs):
    trips_start = trips_with_socio.copy()
    trips_end = trips_with_socio.copy()

    trips_start_gdf = gpd.GeoDataFrame(
    trips_start, 
    geometry=gpd.points_from_xy(
        trips_start['start_point'].apply(lambda p: p[0]), 
        trips_start['start_point'].apply(lambda p: p[1])
    ), 
    crs=crs
)

    trips_end_gdf = gpd.GeoDataFrame(
    trips_end, 
    geometry=gpd.points_from_xy(
        trips_end['end_point'].apply(lambda p: p[0]), 
        trips_end['end_point'].apply(lambda p: p[1])
    ), 
    crs=crs
)
    return trips_start_gdf,trips_end_gdf

def process_centroid(geom_list):
    if not geom_list:  # Empty list
        return [np.nan, np.nan, np.nan]
    elif len(geom_list) == 1:
        return [geom_list[0], np.nan, np.nan]
    elif len(geom_list) == 2:
        return [geom_list[0], geom_list[1], np.nan]
    else:
        return [geom_list[0], geom_list[1], geom_list[2]]
    
def extract_point_coordinates(geom_list):
    coordinates = []
    for geom in geom_list:
        if isinstance(geom, Point):
            coordinates.append((geom.x, geom.y))
        else:
            coordinates.append((np.nan, np.nan))
    return coordinates

def process_value_list(perimeter_list):
    if not perimeter_list:  # Empty list
        return [np.nan, np.nan, np.nan]
    elif len(perimeter_list) == 1:
        return [perimeter_list[0], np.nan, np.nan]
    elif len(perimeter_list) == 2:
        return [perimeter_list[0], perimeter_list[1], np.nan]
    else:
        return [perimeter_list[0], perimeter_list[1], perimeter_list[2]]
    
def compute_district_2_information_counts(district_information_counts, column_to_filter_for):
    district_group_2_information_counts = {}
    for district, group in district_information_counts:        
        # ignore groups with more than one district here. 
        if len(district) == 1:
            total_counts = 0
            total_distributions = []
            counts = group[column_to_filter_for].values            
            for c in counts:
                total_counts += c[0]
                if c[1] is not None and len(c[1]) > 0:
                    total_distributions.extend(c[1])
            distribution_counts = [total_distributions.count(i) for i in range(2, 9)]   
            district_group_2_information_counts[district] = distribution_counts
    return district_group_2_information_counts, distribution_counts

def compute_district_2_information_tensor(district_2_information_counts, distribution_counts, gdf_input):
    district_home_counts_tensor = torch.zeros((len(gdf_input), 3, len(distribution_counts)), dtype=torch.float)
    nan_tensor = torch.full((len(distribution_counts),), float('nan'))

    for idx, row in gdf_input.iterrows():
        district_combination = row['district']
        district_combination_tuple = tuple(district_combination)
        if len(district_combination_tuple) == 0:
            district_home_counts_tensor[idx] = torch.stack([nan_tensor, nan_tensor, nan_tensor])
        elif len(district_combination_tuple) == 1:
            district_home_counts_tensor[idx] = torch.stack([torch.tensor(district_2_information_counts[district_combination_tuple]), nan_tensor, nan_tensor])
        elif len(district_combination_tuple) == 2:
            a, b = district_combination_tuple
            district_home_counts_tensor[idx] = torch.stack([torch.tensor(district_2_information_counts[(a,)]), torch.tensor(district_2_information_counts[(b,)]), nan_tensor])
        elif len(district_combination_tuple) == 3:
            a, b, c = district_combination_tuple
            district_home_counts_tensor[idx] = torch.stack([torch.tensor(district_2_information_counts[(a,)]), torch.tensor(district_2_information_counts[(b,)]), torch.tensor(district_2_information_counts[(c,)])])
        else:
            print("NOT OK!")
            print(district_combination_tuple)
    return district_home_counts_tensor

def preprocess_links(links_gdf):
    for index, row in links_gdf.iterrows():
        if len(row['district']) >= 4:
            row['district'].pop(random.randint(0, len(row['district']) - 1))
    return links_gdf

def find_duplicate_edges_in_gdf(gdf):
    edge_count = defaultdict(list)
    for idx, row in gdf.iterrows():
        edge = tuple(sorted([row['from_node'], row['to_node']]))
        edge_count[edge].append(idx)
    
    duplicates = {edge: indices for edge, indices in edge_count.items() if len(indices) > 1}
    return duplicates

In [3]:
result_dic_output_links = compute_result_dic_output_links()
result_dic_mode_stats = compute_result_dic_mode_stats(calculate_averaged_results)
base_gdf = result_dic_output_links["base_network_no_policies"]
links_gdf_base = gpd.GeoDataFrame(base_gdf, geometry='geometry')
links_gdf_base.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
links_gdf_base.to_crs("EPSG:4326", inplace=True)
districts['district_centroid'] = districts['geometry'].centroid
links_gdf_with_districts = gpd.sjoin(links_gdf_base, districts, how='left', op='intersects')

# Group by edge and aggregate the district names
links_gdf_with_districts = links_gdf_with_districts.groupby('link').agg({
    'from_node': 'first',
    'to_node': 'first',
    'length': 'first',
    'freespeed': 'first',
    'capacity': 'first',
    'lanes': 'first',
    'modes': 'first',
    'vol_car': 'first',
    'highway': 'first',
    'geometry': 'first',
    'c_ar': lambda x: list(x.dropna()),
    'district_centroid': lambda x: list(x.dropna()),
    'perimetre': lambda x: list(x.dropna()),
    'surface': lambda x: list(x.dropna()),
}).reset_index()
gdf_now = gpd.GeoDataFrame(links_gdf_with_districts, geometry='geometry', crs=links_gdf_base.crs)
gdf_now = gdf_now.rename(columns={'c_ar': 'district', 'perimetre': 'district_perimeter', 'surface': 'district_surface'})
links_gdf_final = gdf_now.copy()

Processing subdirs:   0%|          | 0/100 [00:07<?, ?subdir/s]
Processing subdirs:   0%|          | 0/100 [00:01<?, ?subdir/s]

  districts['district_centroid'] = districts['geometry'].centroid
  if await self.run_code(code, result, async_=asy):
  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():


In [91]:
result_dic_mode_stats['Policy introduced in Arrondissement(s) d, 8, 11, 14, 17, 19'].shape

(6, 3)

In [68]:
districts.head()

Unnamed: 0,c_ar,surface,perimetre,geometry,district_centroid
0,1,1824613.0,6054.936862,"POLYGON ((2.32801 48.86992, 2.32997 48.86851, ...",POINT (2.33644 48.86256)
1,2,991153.7,4554.10436,"POLYGON ((2.35152 48.86443, 2.35095 48.86341, ...",POINT (2.34280 48.86828)
2,3,1170883.0,4519.263648,"POLYGON ((2.36383 48.86750, 2.36389 48.86747, ...",POINT (2.36000 48.86287)
3,4,1600586.0,5420.908434,"POLYGON ((2.36851 48.85573, 2.36900 48.85374, ...",POINT (2.35763 48.85434)
4,5,2539375.0,6239.195396,"POLYGON ((2.36443 48.84614, 2.36484 48.84584, ...",POINT (2.35071 48.84444)


In [71]:
# Convert district_centroid to a tensor of size (20, 2)
district_centroids = districts['district_centroid'].apply(lambda point: [point.x, point.y])
district_centroids_tensor = torch.tensor(district_centroids.tolist(), dtype=torch.float32)

# Ensure the tensor is of size (20, 2)
if district_centroids_tensor.size(0) != 20 or district_centroids_tensor.size(1) != 2:
    raise ValueError("The resulting tensor does not have the expected size of (20, 2)")
# Pad the tensor to size (20, 3, 2) by duplicating entries
district_centroids_tensor_padded = district_centroids_tensor.unsqueeze(1).expand(-1, 3, -1)

tensor([[ 2.3364, 48.8626],
        [ 2.3428, 48.8683],
        [ 2.3600, 48.8629],
        [ 2.3576, 48.8543],
        [ 2.3507, 48.8444],
        [ 2.3329, 48.8491],
        [ 2.3122, 48.8562],
        [ 2.3126, 48.8727],
        [ 2.3375, 48.8772],
        [ 2.3607, 48.8761],
        [ 2.3801, 48.8591],
        [ 2.4213, 48.8350],
        [ 2.3623, 48.8284],
        [ 2.3265, 48.8292],
        [ 2.2928, 48.8401],
        [ 2.2620, 48.8604],
        [ 2.3068, 48.8873],
        [ 2.3482, 48.8926],
        [ 2.3848, 48.8871],
        [ 2.4012, 48.8635]])
torch.Size([20, 3, 2])


In [4]:
def summarize_duplicate_edges(gdf):
    # Check if 'vol_car' exists and print its data type
    if 'vol_car' not in gdf.columns:
        print("'vol_car' column does not exist in the dataframe")
        return gdf

    # Create a unique identifier for each edge, regardless of direction
    gdf['edge_id'] = gdf.apply(lambda row: tuple(sorted([row['from_node'], row['to_node']])), axis=1)
    
    # Group by the edge_id
    grouped = gdf.groupby('edge_id')
    
    # Function to aggregate the data
    def aggregate_edges(group):
        # Sum the 'vol_car' column
        vol_car_sum = group['vol_car'].sum()
        
        # Take other attributes from the first entry
        first_entry = group.iloc[0]
        
        # Create a new row with combined data
        combined = first_entry.copy()
        combined['vol_car'] = vol_car_sum
        
        # If you want to keep track of the original directions, you can add this info
        combined['original_directions'] = list(group[['from_node', 'to_node']].itertuples(index=False, name=None))
        
        return combined
    
    # Apply the aggregation
    summarized_gdf = grouped.apply(aggregate_edges)
    
    # Reset the index and drop the temporary edge_id column
    summarized_gdf = summarized_gdf.reset_index(drop=True)
    summarized_gdf = summarized_gdf.drop(columns=['edge_id'])
    
    return summarized_gdf

# Apply the summarization to links_gdf_final
links_gdf_summarized = summarize_duplicate_edges(links_gdf_final)

# Print some information about the summarization
print(f"Original number of edges: {len(links_gdf_final)}")
print(f"Number of edges after summarization: {len(links_gdf_summarized)}")

# Check if there are any remaining duplicates
remaining_duplicates = find_duplicate_edges_in_gdf(links_gdf_summarized)
print(f"Number of remaining duplicate edges: {len(remaining_duplicates)}")

Original number of edges: 31216
Number of edges after summarization: 25309
Number of remaining duplicate edges: 0


In [6]:
links_gdf_final = links_gdf_summarized.copy()

In [7]:
max_district_length = max(links_gdf_final['district'].apply(len))
print(f"The maximum length of the list in 'district' in links_gdf_final is: {max_district_length}")

The maximum length of the list in 'district' in links_gdf_final is: 4


In [8]:
base_dir_sample_sim_input = '../../../../data/pop_1pm_simulations/idf_1pm/' 
homes_gdf, households_df, persons_df, activities_gdf, trips_df = get_dfs(base_dir=base_dir_sample_sim_input)

Loaded CSV file: idf_1pm_persons.csv into variable: persons_df
Loaded GPKG file: idf_1pm_commutes.gpkg into variable: commutes_gdf
Loaded CSV file: idf_1pm_households.csv into variable: households_df
Loaded CSV file: idf_1pm_trips.csv into variable: trips_df
Loaded CSV file: idf_1pm_activities.csv into variable: activities_df
Loaded CSV file: idf_1pm_vehicle_types.csv into variable: vehicle_types_df
Loaded GPKG file: idf_1pm_trips.gpkg into variable: trips_gdf
Loaded GPKG file: idf_1pm_activities.gpkg into variable: activities_gdf
Loaded CSV file: idf_1pm_vehicles.csv into variable: vehicles_df
Loaded GPKG file: idf_1pm_homes.gpkg into variable: homes_gdf


In [9]:
population_df = pd.read_csv("intermediate_results/population.csv")

sorted_population_df = population_df.sort_values(by="id")
sorted_persons_df = persons_df.sort_values(by="person_id")
merged_df = pd.merge(sorted_persons_df, sorted_population_df, left_on="person_id", right_on="id")
removed_some_columns = merged_df.copy()
removed_some_columns = removed_some_columns.drop(columns=['employed_y', 'hasPtSubscription', 'householdId', 'sex_y', 'htsPersonId', 'censusPersonId', 'hasLicense', 'id', 'age_y'])
updated_persons = removed_some_columns.copy()
persons_with_geospatial_information = homes_gdf.merge(updated_persons, on='household_id', how='right')

if not isinstance(persons_with_geospatial_information, gpd.GeoDataFrame):
    persons_with_geospatial_information = gpd.GeoDataFrame(persons_with_geospatial_information, geometry=gpd.points_from_xy(persons_with_geospatial_information.longitude, persons_with_geospatial_information.latitude), crs= links_gdf_final.crs)

utm_crs = 'EPSG:32631'  # UTM zone 31N

In [10]:
# # DEAL WITH TRIPS

# trips_with_socio = trips_df.merge(persons_with_geospatial_information[['person_id', 'socioprofessional_class']], on='person_id', how='left')
# trips_with_socio['start_point'] = trips_with_socio['geometry'].apply(lambda geom: extract_start_end_points(geom)[0])
# trips_with_socio['end_point'] = trips_with_socio['geometry'].apply(lambda geom: extract_start_end_points(geom)[1])

# trips_start_gdf, trips_end_gdf = get_start_and_end_gdf(trips_with_socio=trips_with_socio, crs=links_gdf_final.crs)

# # Create tensors for each combination of "preceding_purpose" and "following_purpose"
# unique_purposes = trips_with_socio['preceding_purpose'].unique()
# close_start_trips_tensor_dict = {}
# close_start_trips_dict = {}
# close_end_trips_tensor_dict = {}
# close_end_trips_dict = {}

# for preceding_purpose in tqdm(unique_purposes, desc="Processing preceding purposes", unit="purpose"):
#     for following_purpose in tqdm(unique_purposes, desc="Processing following purposes", unit="purpose"):
#         if preceding_purpose != following_purpose:
#             filtered_trips = trips_with_socio[(trips_with_socio['preceding_purpose'] == preceding_purpose) & (trips_with_socio['following_purpose'] == following_purpose)]
#             if not filtered_trips.empty:
#                 filtered_trips_start_gdf, filtered_trips_end_gdf = get_start_and_end_gdf(trips_with_socio=filtered_trips, crs=links_gdf_final.crs)
        
#                 tensor_key = f"{preceding_purpose}_{following_purpose}"
#                 string_trips_start = "trips_start_" + tensor_key
#                 string_trips_end = "trips_end_" + tensor_key
                
#                 close_start_trips, close_start_trips_tensor = get_close_trips_tensor(links_gdf_input=links_gdf_final, trips_gdf_input=filtered_trips_start_gdf, utm_crs=utm_crs, distance=50)        
#                 close_start_trips_tensor_dict[tensor_key] = close_start_trips_tensor
#                 close_start_trips_dict[tensor_key] = close_start_trips
#                 links_gdf_final[string_trips_start ] = close_start_trips
                
#                 close_end_trips, close_end_trips_tensor = get_close_trips_tensor(links_gdf_input=links_gdf_final, trips_gdf_input=filtered_trips_end_gdf, utm_crs=utm_crs, distance=50)
#                 close_end_trips_tensor_dict[tensor_key] = close_end_trips_tensor
#                 close_end_trips_dict[tensor_key] = close_end_trips
#                 links_gdf_final[string_trips_end] = close_end_trips

# for key, tensor in close_start_trips_tensor_dict.items():
#     print(f"Size of tensor for {key}: {tensor.size()}")

In [11]:
# # for key, tensor in close_end_trips_tensor_dict.items():
# #     print(f"Size of tensor for {key}: {tensor.size()}")
    
# for key, tensor in close_end_trips_dict.items():
#     print(f"Size of tensor for {key}: {len(tensor)}")

In [12]:
# DEAL WITH HOMES

links_gdf_final.crs = "EPSG:4326"

close_homes_count_normal = compute_close_homes(links_gdf_input = links_gdf_final, information_gdf_input = persons_with_geospatial_information, utm_crs = utm_crs)
links_gdf_final['close_homes_count'] = close_homes_count_normal
close_homes_tensor = process_close_count_to_tensor(close_homes_count_normal)

Processing rows: 25309row [01:04, 389.50row/s]


In [13]:
# # DEAL WITH ACTIVITIES

# activities_with_socio = activities_gdf.merge(persons_with_geospatial_information[['household_id', 'socioprofessional_class']], on='household_id', how='left')
# grouped_activities = activities_with_socio.groupby('purpose')
# activities_by_purpose = {purpose: group.reset_index(drop=True) for purpose, group in grouped_activities}
# activities_by_purpose_tensor = {}
# for purpose, activities in activities_by_purpose.items():
#     close_activities_count_purpose = f"close_activities_count_{purpose}"
#     close_activity_count = compute_close_homes(links_gdf_input=links_gdf_final, information_gdf_input=activities, utm_crs=utm_crs)
#     links_gdf_final[close_activities_count_purpose] = close_activity_count
#     activities_by_purpose_tensor[purpose] = process_close_count_to_tensor(close_activity_count)    

# Deal with districts

In [14]:
# DISTRICTS GENERAL 

links_gdf_final['districts_tuple'] = links_gdf_final['district'].apply(lambda x: tuple(x))
district_tuples = links_gdf_final.groupby('districts_tuple')

In [50]:
# # DISTRICT CENTROIDS

centroid_distances = np.array([
    process_centroid(geom_list)
    for geom_list in links_gdf_final['district_centroid']
])

# Process the centroids
centroid_distance_with_coordinates = np.array([
    extract_point_coordinates(geom_list)
    for geom_list in centroid_distances
])
district_centroids_tensor = torch.tensor(centroid_distance_with_coordinates, dtype=torch.float)

# # FIND DISTRICT POLYGON

In [52]:
district_centroids_tensor.shape

torch.Size([25309, 3, 2])

In [16]:
# # DISTRICT HOME COUNTS 

# district_2_home_counts, distribution_counts = compute_district_2_information_counts(district_information_counts=district_tuples, column_to_filter_for = 'close_homes_count')
# district_home_counts_tensor = compute_district_2_information_tensor(district_2_information_counts=district_2_home_counts, distribution_counts=distribution_counts, gdf_input=links_gdf_final)

In [17]:
# # DISTRICT ACTIVITIES COUNTS

# activity_2_district_tensor = {}
# for purpose in activities_by_purpose_tensor.keys():
#     district_2_activity_counts, distribution_counts_activity = compute_district_2_information_counts(district_information_counts=district_tuples, column_to_filter_for='close_activities_count_' + purpose)
#     district_activitiy_counts_tensor = compute_district_2_information_tensor(district_2_information_counts=district_2_activity_counts, distribution_counts=distribution_counts_activity, gdf_input=links_gdf_final)
#     activity_2_district_tensor[purpose] = district_activitiy_counts_tensor

In [18]:
# indices = torch.nonzero(~torch.isnan(activity_2_district_tensor['education'][:, 1, 0])).squeeze()
# indices

In [19]:
# # DISTRICT TRIPS START AND END

# unique_purposes = trips_with_socio['preceding_purpose'].unique()
# district_close_start_trips_tensor_dict = {}
# district_close_end_trips_tensor_dict = {}

# for preceding_purpose in tqdm(unique_purposes, desc="Processing preceding purposes", unit="purpose"):
#     for following_purpose in tqdm(unique_purposes, desc="Processing following purposes", unit="purpose"):
#         if preceding_purpose != following_purpose:
#             filtered_trips = trips_with_socio[(trips_with_socio['preceding_purpose'] == preceding_purpose) & (trips_with_socio['following_purpose'] == following_purpose)]
#             if not filtered_trips.empty:
#                 tensor_key = f"{preceding_purpose}_{following_purpose}"
#                 print(tensor_key)

#                 string_trips_start_purpose = "trips_start_" + tensor_key
#                 string_trips_end_purpose = "trips_start_" + tensor_key
                
#                 district_2_start_trips, start_trip_distributions = compute_district_2_information_counts(district_information_counts=district_tuples, column_to_filter_for = string_trips_start_purpose)
#                 district_trip_starts_tensor = compute_district_2_information_tensor(district_2_information_counts=district_2_start_trips, distribution_counts=start_trip_distributions, gdf_input=links_gdf_final)
#                 district_close_start_trips_tensor_dict[string_trips_start_purpose] = district_trip_starts_tensor
                
#                 district_2_end_trips, end_trip_distributions = compute_district_2_information_counts(district_information_counts=district_tuples, column_to_filter_for = string_trips_end_purpose)
#                 district_trip_end_tensor = compute_district_2_information_tensor(district_2_information_counts=district_2_end_trips, distribution_counts=end_trip_distributions, gdf_input=links_gdf_final)
#                 district_close_end_trips_tensor_dict[string_trips_end_purpose] = district_trip_end_tensor

In [72]:
# PROCESS LINK GEOMETRIES

edge_midpoints = np.array([((geom.coords[0][0] + geom.coords[-1][0]) / 2, 
                                    (geom.coords[0][1] + geom.coords[-1][1]) / 2) 
                                for geom in links_gdf_final.geometry])

nodes = pd.concat([links_gdf_final['from_node'], links_gdf_final['to_node']]).unique()
node_to_idx = {node: idx for idx, node in enumerate(nodes)}
links_gdf_final['from_idx'] = links_gdf_final['from_node'].map(node_to_idx)
links_gdf_final['to_idx'] = links_gdf_final['to_node'].map(node_to_idx)
edges_base = links_gdf_final[['from_idx', 'to_idx']].values
edge_midpoint_tensor = torch.tensor(edge_midpoints, dtype=torch.float)

# Initialize start and end points
start_points = np.array([geom.coords[0] for geom in links_gdf_final.geometry])
end_points = np.array([geom.coords[-1] for geom in links_gdf_final.geometry])

# Convert to tensors
edge_start_point_tensor = torch.tensor(start_points, dtype=torch.float)
edge_end_point_tensor = torch.tensor(end_points, dtype=torch.float)

edge_start_end_tensor = torch.stack((edge_start_point_tensor, edge_end_point_tensor), dim=1)

stacked_edge_geometries_tensor = torch.stack([edge_start_point_tensor, edge_end_point_tensor, edge_midpoint_tensor], dim=1)

print(stacked_edge_geometries_tensor.shape)
print(edge_start_point_tensor.shape)
print(edge_start_point_tensor.to_dense().shape)

torch.Size([25309, 3, 2])
torch.Size([25309, 2])
torch.Size([25309, 2])


## Analyze results and plot

In [21]:
# pio.analyze_geodataframes(result_dic=result_dic, consider_only_highway_edges=True)

In [22]:
# pio.analyze_geodataframes(result_dic=result_dic, consider_only_highway_edges=False)

In [23]:
# # Stack all entries of trips in one tensor
# close_start_trips_tensors = [tensor.to_dense() for tensor in close_start_trips_tensor_dict.values()]
# stacked_close_start_trips_tensor = torch.cat(close_start_trips_tensors, dim=1)

# close_end_trips_tensors = [tensor.to_dense() for tensor in close_end_trips_tensor_dict.values()]
# stacked_close_end_trips_tensor = torch.cat(close_end_trips_tensors, dim=1)

# district_start_trips_tensors = [tensor.to_dense() for tensor in district_close_start_trips_tensor_dict.values()]
# stacked_district_start_trips_tensor = torch.cat(district_start_trips_tensors, dim=1)

# district_end_trips_tensors = [tensor.to_dense() for tensor in district_close_end_trips_tensor_dict.values()]
# stacked_district_end_trips_tensor = torch.cat(district_end_trips_tensors, dim=1)

In [82]:
def aggregate_district_information(links_gdf, tensors_edge_information):
    
    # Assuming tensors_edge_information is a list of tensors
    vol_base_case = tensors_edge_information[0]  # Adjust index if needed
    capacities_base = tensors_edge_information[1]  # Assuming capacities_new is the second tensor
    capacities_new = tensors_edge_information[2]  # Assuming capacities_new is the second tensor
    capacity_reduction = tensors_edge_information[3]  # Assuming capacity_reduction is the third tensor, etc. 
    freespeed_base = tensors_edge_information[4]
    freespeed = tensors_edge_information[5]
    highway = tensors_edge_information[6]
    length = tensors_edge_information[7]
    allowed_modes = tensors_edge_information[8]
    
    district_info = {}
            
    modes_str = ""
    for idx, row in links_gdf.iterrows():
        districts = row['district']
        modes = row['modes']
        modes_str += modes + ","
        for district in districts:
            if district not in district_info:
                district_info[district] = {
                    'vol_base_case': 0,
                    'capacity_base': 0,
                    'capacity_new': 0,
                    'capacity_reduction': 0,
                    'freespeed_base_sum': 0,
                    'freespeed_base_count': 0,
                    'freespeed_sum': 0,
                    'freespeed_count': 0,
                    'highway_sum': 0,
                    'highway_count': 0,
                    'length': 0,
                    'edge_count': 0,
                }
            
            if "car" in modes:
                district_info[district]['capacity_base'] += capacities_base[idx].item()
                district_info[district]['capacity_new'] += capacities_new[idx].item()
                district_info[district]['capacity_reduction'] += capacity_reduction[idx].item()
                district_info[district]['freespeed_sum'] += freespeed[idx].item()
                district_info[district]['freespeed_base_sum'] += freespeed_base[idx].item()
                district_info[district]['freespeed_base_count'] += 1
                district_info[district]['freespeed_count'] += 1
            else:
                district_info[district]['capacity_base'] += 0
                district_info[district]['capacity_new'] += 0
                district_info[district]['capacity_reduction'] += 0
                district_info[district]['freespeed_sum'] += 0
                district_info[district]['freespeed_base_sum'] += 0
            
            district_info[district]['capacity_base'] += capacities_base[idx].item()  # Assuming capacity is the second tensor
            district_info[district]['capacity_new'] += capacities_new[idx].item()
            district_info[district]['capacity_reduction'] += capacity_reduction[idx].item()
    
            district_info[district]['length'] += length[idx].item()

            highway_value = highway_mapping.get(row['highway'], -1)
            district_info[district]['highway_sum'] += highway_value
            district_info[district]['highway_count'] += 1
            district_info[district]['edge_count'] += 1


    # Convert allowed_modes to a list for each district
    for district in district_info:
        district_info[district]['freespeed_base'] = district_info[district]['freespeed_base_sum'] / district_info[district]['freespeed_base_count']
        district_info[district]['freespeed'] = district_info[district]['freespeed_sum'] / district_info[district]['freespeed_count']
        district_info[district]['highway'] = district_info[district]['highway_sum'] / district_info[district]['highway_count']
        district_info[district]['allowed_modes'] = encode_modes_string(modes_str)
    
    # Sort districts by their identifiers
    districts = sorted(district_info.keys())
    
    vol_base_case_tensor = torch.tensor([district_info[d]['vol_base_case'] for d in districts])
    capacity_base_tensor = torch.tensor([district_info[d]['capacity_base'] for d in districts])
    capacity_new_tensor = torch.tensor([district_info[d]['capacity_new'] for d in districts])
    capacity_reduction_tensor = torch.tensor([district_info[d]['capacity_reduction'] for d in districts])
    
    length_tensor = torch.tensor([district_info[d]['length'] for d in districts])
    edge_count_tensor = torch.tensor([district_info[d]['edge_count'] for d in districts])
    highway_tensor = torch.tensor([district_info[d]['highway'] for d in districts])
    freespeed_base_tensor = torch.tensor([district_info[d]['freespeed_base'] for d in districts])
    freespeed_tensor = torch.tensor([district_info[d]['freespeed'] for d in districts])
    allowed_modes_tensor = torch.stack([district_info[d]['allowed_modes'] for d in districts])

    return {
        'districts': districts,
        'vol_base_case': vol_base_case_tensor,
        'capacity_base': capacity_base_tensor,
        'capacity_new': capacity_new_tensor,
        'capacity_reduction': capacity_reduction_tensor,
        'length': length_tensor,
        'highway': highway_tensor,
        'freespeed_base': freespeed_base_tensor,
        'freespeed': freespeed_tensor,
        'allowed_modes': allowed_modes_tensor,
        'edge_count': edge_count_tensor,
    }

In [103]:
# Pad the tensors of shape (20,) to shape (20, 6)
def pad_tensor(tensor, target_shape):
    if tensor.dim() < len(target_shape):
        return tensor.unsqueeze(1).repeat(1, target_shape[1])
    return tensor

def compute_stacked_edge_and_district_tensors(vol_base_case, capacity_base_case, length, freespeed_base_case, allowed_modes, gdf, capacities_new, capacity_reduction, highway, freespeed):
    edge_tensors = [
                torch.tensor(vol_base_case), 
                torch.tensor(capacity_base_case), 
                torch.tensor(capacities_new), 
                torch.tensor(capacity_reduction), 
                torch.tensor(freespeed_base_case), 
                torch.tensor(freespeed), 
                torch.tensor(highway), 
                torch.tensor(length), 
                torch.tensor(allowed_modes)
            ]
    district_info = aggregate_district_information(gdf, edge_tensors)
    district_tensors = [
                district_info['vol_base_case'],
                district_info['capacity_base'],
                district_info['capacity_new'],
                district_info['capacity_reduction'],
                district_info['freespeed_base'],
                district_info['freespeed'],
                district_info['highway'],
                district_info['length'],
                district_info['allowed_modes']
            ]
    target_shape = district_info['allowed_modes'].shape # this shape is the only one that is not of shape (20,) but of shape (20, 6)
    stacked_tensors_edge_information = torch.stack(
                [pad_tensor(tensor, target_shape) for tensor in edge_tensors], 
                dim=1
            )
    stacked_district_tensors = torch.stack(
                [pad_tensor(tensor, target_shape) for tensor in district_tensors], 
                dim=1
            )
    
    return district_info,stacked_tensors_edge_information,stacked_district_tensors


def compute_node_attributes(district_info, linegraph_data):
    num_edge_nodes = linegraph_data.num_nodes
    num_district_nodes = len(district_info['districts'])
    existing_feature_dim1 = linegraph_data.x.size(1) if linegraph_data.x is not None else 9
    existing_feature_dim2 = linegraph_data.x.size(2) if linegraph_data.x is not None else 6 
    node_type_feature = torch.zeros((num_edge_nodes + num_district_nodes, 1, existing_feature_dim2), dtype=torch.long)
    node_type_feature[num_edge_nodes:, :, :] = 1
    return num_edge_nodes,num_district_nodes,existing_feature_dim1,existing_feature_dim2,node_type_feature

def compute_edge_attributes(district_info, linegraph_data):
    district_node_offset = linegraph_data.num_nodes
    edge_to_district_edges = []
    for idx, row in links_gdf_final.iterrows():
        for district in row['district']:
            district_idx = district_info['districts'].index(district) + district_node_offset
            edge_to_district_edges.append([idx, district_idx])
            edge_to_district_edges.append([district_idx, idx])  # Add reverse edge for undirected graph  # TODO is one way enough ? 
            
    edge_to_district_index = torch.tensor(edge_to_district_edges, dtype=torch.long).t()
    linegraph_data.edge_index = torch.cat([linegraph_data.edge_index, edge_to_district_index], dim=1)
    edge_to_district_index = torch.tensor(edge_to_district_edges, dtype=torch.long).t()
    edge_to_district_attr = torch.ones((edge_to_district_index.shape[1], 1), dtype=torch.long)
    return edge_to_district_index,edge_to_district_attr

def compute_target_tensor(vol_base_case, gdf, district_info):
    edge_car_volume_difference = gdf['vol_car'].values - vol_base_case
    district_car_volume_difference = []
    for district in district_info['districts']:
        district_edges = gdf[gdf['district'].apply(lambda x: district in x)]
        district_volume_diff = district_edges['vol_car'].sum() - district_edges['vol_car_base_case'].sum()
        district_car_volume_difference.append(district_volume_diff)
    district_car_volume_difference = torch.tensor(district_car_volume_difference, dtype=torch.float).unsqueeze(1)
    target_values = torch.cat([torch.tensor(edge_car_volume_difference, dtype=torch.float).unsqueeze(1), district_car_volume_difference], dim=0)
    return target_values


def combine_stacked_tensors(vol_base_case, capacity_base_case, length, freespeed_base_case, allowed_modes, gdf, capacities_new, capacity_reduction, highway, freespeed):
    district_info, stacked_tensors_edge_information, stacked_district_tensors = compute_stacked_edge_and_district_tensors(vol_base_case, capacity_base_case, length, freespeed_base_case, allowed_modes, gdf, capacities_new, capacity_reduction, highway, freespeed)
    combined_tensor = torch.cat((stacked_tensors_edge_information, stacked_district_tensors), dim=0)
    return district_info,combined_tensor

def get_basic_edge_attributes(capacity_base_case, gdf):
    capacities_new = np.where(gdf['modes'].str.contains('car'), gdf['capacity'], 0)
    capacity_reduction = capacities_new - capacity_base_case
    highway = gdf['highway'].apply(lambda x: highway_mapping.get(x, -1)).values
    freespeed = np.where(gdf['modes'].str.contains('car'), gdf['freespeed'], 0)
    return capacities_new,capacity_reduction,highway,freespeed

def prepare_gdf(df):
    gdf = links_gdf_final[['link', 'district', 'geometry']].merge(df, on='link', how='left')
    gdf = gpd.GeoDataFrame(gdf, geometry='geometry')
    gdf.crs = links_gdf_final.crs
    return gdf


In [110]:
def process_result_dic(result_dic, result_dic_mode_stats):
    datalist = []
    linegraph_transformation = LineGraph()
    
    vol_base_case = links_gdf_final['vol_car'].values
    capacity_base_case = np.where(links_gdf_final['modes'].str.contains('car'), links_gdf_final['capacity'], 0)
    length = links_gdf_final['length'].values
    freespeed_base_case = links_gdf_final['freespeed'].values
    allowed_modes = encode_modes(links_gdf_final)
    close_homes = close_homes_tensor.to_dense()
    
    edge_index = torch.tensor(edges_base, dtype=torch.long).t().contiguous()
    x = torch.zeros((len(nodes), 1), dtype=torch.float)
    data = Data(edge_index=edge_index, x=x)
    
    for key, df in tqdm(result_dic.items(), desc="Processing result_dic", unit="dataframe"):    
        if isinstance(df, pd.DataFrame) and key != "base_network_no_policies":
            gdf = prepare_gdf(df)
            capacities_new, capacity_reduction, highway, freespeed = get_basic_edge_attributes(capacity_base_case, gdf)
            district_info, combined_tensor = combine_stacked_tensors(vol_base_case, capacity_base_case, length, freespeed_base_case, allowed_modes, gdf, capacities_new, capacity_reduction, highway, freespeed)
            
            linegraph_data = linegraph_transformation(data)
            linegraph_data.x = combined_tensor
        
            # add edge attributes: 1 if edge to district, 0 if edge to edge
            edge_to_district_index, edge_to_district_attr = compute_edge_attributes(district_info, linegraph_data)
            if linegraph_data.edge_attr is None:
                linegraph_data.edge_attr = torch.zeros((linegraph_data.edge_index.shape[1] - edge_to_district_index.shape[1], 1), dtype=torch.long)
            linegraph_data.edge_attr = torch.cat([linegraph_data.edge_attr, edge_to_district_attr], dim=0)

            # add node attributes: 1 if district, 0 if edge
            num_edge_nodes, num_district_nodes, existing_feature_dim1, existing_feature_dim2, node_type_feature = compute_node_attributes(district_info, linegraph_data)
            if linegraph_data.x is None:
                linegraph_data.x = torch.zeros((num_edge_nodes + num_district_nodes, existing_feature_dim1, existing_feature_dim2), dtype=torch.float)
            linegraph_data.x = torch.cat([linegraph_data.x, node_type_feature], dim=1)
            
            linegraph_data.num_nodes = num_edge_nodes + num_district_nodes
            linegraph_data.pos = torch.cat([stacked_edge_geometries_tensor, district_centroids_tensor_padded], dim=0)
            linegraph_data.y = compute_target_tensor(vol_base_case, gdf, district_info)
                        
            df_mode_stats = result_dic_mode_stats.get(key)
            if df_mode_stats is not None:
                numeric_cols = df_mode_stats.select_dtypes(include=[np.number]).columns
                mode_stats_numeric = df_mode_stats[numeric_cols].astype(float)
                mode_stats_tensor = torch.tensor(mode_stats_numeric.values, dtype=torch.float)
                linegraph_data.mode_stats = mode_stats_tensor
            
            if linegraph_data.validate(raise_on_error=True):
                datalist.append(linegraph_data)
            else:
                print("Invalid line graph data")
            break
    return datalist

# Call the function
data_processed = process_result_dic(result_dic=result_dic_output_links, result_dic_mode_stats=result_dic_mode_stats)

  torch.tensor(allowed_modes)
Processing result_dic:   1%|▏         | 1/79 [00:01<02:15,  1.74s/dataframe]


In [None]:
# tensors_districts_information = []
            
            # tensors_with_sociographic_information = [close_homes, 
            #     activities_home, activities_work, activities_education, activities_shop, activities_leisure, activities_other,
            #     stacked_close_start_trips_tensor, stacked_close_end_trips_tensor]
                
            # tensors_districts_with_sociographic_information = [
            #     district_home_counts,
            #     district_activities_home, district_activities_work, district_activities_education, district_activities_shop, district_activities_leisure, district_activities_other,
            #     stacked_district_start_trips_tensor, stacked_district_end_trips_tensor]
            
            # linegraph_x_districts = torch.tensor(np.column_stack(tensors_districts_information), dtype=torch.float)
            # stack linegraph_x and linegraph_x_districts
            
            
            # linegraph_pos = torch.tensor(np.column_stack(edge_midpoint_t), dtype=torch.float)

            # Print shapes for debugging
            # for i, t in enumerate(tensors_edge_information):
            #     print(f"Shape of tensor {i}: {t.shape}")

            # linegraph_data.x = linegraph_x
            # linegraph_data.pos = linegraph_pos
            # linegraph_data.y = target_values
            
            # df_mode_stats = result_dic_mode_stats.get(key)
            # if df_mode_stats is not None:
            #     numeric_cols = df_mode_stats.select_dtypes(include=[np.number]).columns
            #     mode_stats_numeric = df_mode_stats[numeric_cols].astype(float)
            #     mode_stats_tensor = torch.tensor(mode_stats_numeric.values, dtype=torch.float)
            #     linegraph_data.mode_stats = mode_stats_tensor
            # if linegraph_data.validate(raise_on_error=True):
            #     datalist.append(linegraph_data)
            # else:
            #     print("Invalid line graph data")
            # print("LG DATA EDGE INDEx")
            # for lg_data in datalist:
            #     print(lg_data.edge_index.shape)
            #     print(lg_data.edge_index)
                
            # print(lg_data.edge_index.shape for lg_data in datalist)

            # print(lg_data.edge_index for lg_data in datalist)

In [100]:
data_processed[0]['x']

tensor([[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         ...,
         [2.0037e+01, 2.0037e+01, 2.0037e+01, 2.0037e+01, 2.0037e+01,
          2.0037e+01],
         [0.0000e+00, 0.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00]],

        [[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         ...,
         [9.7760e+00, 9.7760e+00, 9.7760e+00, 9.7760e+00, 9.7760e+00,
          9.7760e+00],
         [0.0000e+00, 0.0000e+00, 1.0000

## Save for further processing with GNN

In [None]:
torch.save(data_processed, result_path)

In [None]:
# plt.figure(figsize=(10, 6))
# plt.scatter(persons_with_homes.geometry.x, persons_with_homes.geometry.y, s=1, color='blue', alpha=0.5)
# plt.scatter(persons_with_home_within_linear_ring.geometry.x, persons_with_home_within_linear_ring.geometry.y, s=1, color='red', alpha=0.5)
# plt.title('Locations of Persons with Homes')
# plt.xlabel('Longitude')
# plt.ylabel('Latitude')
# plt.show()

# from shapely.geometry import LineString
# from shapely.geometry import MultiPolygon
# import matplotlib.pyplot as plt

# # Create a LineString
# line = LineString([(10, 10), (20, 10)])

# # Create a buffer around the line
# buffered_line = line.buffer(2, cap_style="round")

# # Plot the original line and the buffered area
# plt.figure(figsize=(8, 6))
# x, y = line.xy
# plt.plot(x, y, color='blue', label='Original Line')
# if isinstance(buffered_line, MultiPolygon):
#     for polygon in buffered_line:
#         x, y = polygon.exterior.xy
#         plt.fill(x, y, alpha=0.5, color='lightblue', label='Buffered Area')
# else:
#     x, y = buffered_line.exterior.xy
#     plt.fill(x, y, alpha=0.5, color='lightblue', label='Buffered Area')

# plt.title('Line with Buffered Area')
# plt.xlabel('X-axis')
# plt.ylabel('Y-axis')
# plt.legend()
# plt.grid()
# plt.axis('equal')
# plt.show()


# def check_trips_equivalence(close_trips_start, close_trips_end):
#     """
#     Check if close_trips_start and close_trips_end are equivalent.
    
#     Args:
#     close_trips_start (list): List of tuples for start trips
#     close_trips_end (list): List of tuples for end trips
    
#     Returns:
#     bool: True if equivalent, False otherwise
#     """
#     if len(close_trips_start) != len(close_trips_end):
#         print("Lists have different lengths.")
#         return False
    
#     differences = []
#     for i, (start, end) in enumerate(zip(close_trips_start, close_trips_end)):
#         if start != end:
#             differences.append((i, start, end))
    
#     if not differences:
#         print("The lists are identical.")
#         return True
#     else:
#         print(f"Found {len(differences)} differences:")
#         for diff in differences[:10]:  # Print first 10 differences
#             print(f"Index {diff[0]}: Start {diff[1]}, End {diff[2]}")
#         if len(differences) > 10:
#             print(f"... and {len(differences) - 10} more differences.")
#         return False

# # Usage
# are_equivalent = check_trips_equivalence(close_trips_start, close_trips_end)
# print(f"Are the trip lists equivalent? {are_equivalent}")