In [21]:
import math
import numpy as np
import wandb
import pickle
import os
import shapely.wkt as wkt
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
from torch_geometric.transforms import LineGraph

import gzip
import xml.etree.ElementTree as ET

import torch
import torch_geometric
from torch_geometric.data import Data

import processing_io as pio
import sys
import os
import joblib
import json

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from shapely.geometry import Point, LineString, box
from matplotlib.colors import TwoSlopeNorm

from shapely.ops import unary_union
from mpl_toolkits.axes_grid1 import make_axes_locatable
from torch_geometric.data import Data, Batch
import torch
from torch_geometric.data import Data
import alphashape
from matplotlib.lines import Line2D

from shapely.geometry import Polygon
from torch.utils.data import DataLoader, Dataset, Subset


districts = gpd.read_file("../../data/visualisation/districts_paris.geojson")

# Add the 'scripts' directory to the Python path
scripts_path = os.path.abspath(os.path.join('..'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

import gnn_io as gio
import gnn_architectures as garch
import help_functions as hf

import copy

def replace_invalid_values(tensor):
    tensor[tensor != tensor] = 0  # replace NaNs with 0
    tensor[tensor == float('inf')] = 0  # replace inf with 0
    tensor[tensor == float('-inf')] = 0  # replace -inf with 0
    return tensor

def plot_combined_output(gdf_input: gpd.GeoDataFrame, column_to_plot: str, font: str = 'Times New Roman', 
                         save_it: bool = False, number_to_plot: int = 0,
                         zone_to_plot:str= "this_zone",
                         is_predicted: bool = False, alpha:int=100, 
                         use_fixed_norm:bool=True, 
                         fixed_norm_max: int= 10, normalized_y:bool=False, known_districts:bool=False, buffer: float = 0.0005, districts_of_interest: list =[1, 2, 3, 4]):
    # call with known_districts if call with 0 or 1

    gdf = gdf_input.copy()
    gdf, x_min, y_min, x_max, y_max = filter_for_geographic_section(gdf)
    # gdf = gdf[gdf["og_highway"].isin([1])]

    fig, ax = plt.subplots(1, 1, figsize=(15, 15))    
    norm = get_norm(column_to_plot=column_to_plot, use_fixed_norm=use_fixed_norm, fixed_norm_max=fixed_norm_max, gdf=gdf)
    relevant_area_to_plot = get_relevant_area_to_plot(alpha, known_districts, buffer, districts_of_interest, gdf, ax, column_to_plot, norm, "og_highway")
    relevant_area_to_plot.plot(ax=ax, edgecolor='black', linewidth=2, facecolor='None', zorder=2)

    cbar = plotting(font, x_min, y_min, x_max, y_max, fig, ax, norm)
    
    cbar.set_label('Car volume: Difference to base case (%)', fontname=font, fontsize=15)
    if save_it:
        p = "predicted" if is_predicted else "actual"
        identifier = "n_" + str(number_to_plot) if number_to_plot is not None else zone_to_plot
        plt.savefig("results/" + identifier + "_" + p, bbox_inches='tight')
    plt.show()

def get_norm(column_to_plot, use_fixed_norm, fixed_norm_max, gdf):
    if use_fixed_norm:
        norm = TwoSlopeNorm(vmin=-fixed_norm_max, vcenter=0, vmax=fixed_norm_max)
    else:
        norm = TwoSlopeNorm(vmin=gdf[column_to_plot].min(), vcenter=gdf[column_to_plot].median(), vmax=gdf[column_to_plot].max())
    return norm
    
# def plot_difference_output(gdf_input: gpd.GeoDataFrame, column1: str, column2: str, diff_column: str = 'difference', font: str = 'Times New Roman', save_it: bool = False, number_to_plot: int = 0,
#                            zone_to_plot:str= "this_zone", alpha:int=100, 
#                          use_fixed_norm:bool=True, 
#                          fixed_norm_max: int= 10, normalized_y: bool=False, known_districts:bool=False, buffer: float = 0.0005, districts_of_interest: list =[1, 2, 3, 4]):
#     gdf = gdf_input.copy()
#     gdf[diff_column] = gdf[column1] - gdf[column2]
#     column_to_plot = diff_column

#     gdf, x_min, y_min, x_max, y_max = filter_for_geographic_section(gdf)

#     fig, ax = plt.subplots(1, 1, figsize=(15, 15))    
#     norm = get_norm(column_to_plot=column_to_plot, use_fixed_norm=use_fixed_norm, fixed_norm_max=fixed_norm_max, gdf=gdf)
#     relevant_area_to_plot = get_relevant_area_to_plot(alpha, known_districts, buffer, districts_of_interest, gdf, ax, column_to_plot, norm, "og_highway")
#     relevant_area_to_plot.plot(ax=ax, edgecolor='black', linewidth=2, facecolor='None', zorder=2)

#     cbar = plotting(font, x_min, y_min, x_max, y_max, fig, ax, norm)
#     cbar.set_label('Difference between predicted and actual (%)', fontname=font, fontsize=15)
#     if save_it:
#         identifier = "n_" + str(number_to_plot) if number_to_plot is not None else zone_to_plot
#         plt.savefig("results/" + identifier  + "_difference", bbox_inches='tight')
    # plt.show()

def filter_for_geographic_section(gdf):
    x_min = gdf.total_bounds[0] + 0.05
    y_min = gdf.total_bounds[1] + 0.05
    x_max = gdf.total_bounds[2]
    y_max = gdf.total_bounds[3]
    bbox = box(x_min, y_min, x_max, y_max)

    # Filter the network to include only the data within the bounding box
    gdf = gdf[gdf.intersects(bbox)]
    return gdf,x_min,y_min,x_max,y_max

def plotting(font, x_min, y_min, x_max, y_max, fig, ax, norm):
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xlabel("Longitude", fontname=font, fontsize=15)
    plt.ylabel("Latitude", fontname=font, fontsize=15)

    # Customize tick labels
    ax.tick_params(axis='both', which='major', labelsize=10)
    for label in (ax.get_xticklabels() + ax.get_yticklabels()):
        label.set_fontname(font)
        label.set_fontsize(15)
    
    # Create custom legend
    custom_lines = [Line2D([0], [0], color='grey', lw=4, label='Street network'),# Add more lines for other labels as needed
                    Line2D([0], [0], color='black', lw=2, label='Capacity was decreased in this section')]

    ax.legend(handles=custom_lines, prop={'family': font, 'size': 15})
    ax.set_position([0.1, 0.1, 0.75, 0.75])
    cax = fig.add_axes([0.87, 0.22, 0.03, 0.5])  # Manually position the color bar
    
    # Create the color bar
    sm = plt.cm.ScalarMappable(cmap='coolwarm', norm=norm)
    sm._A = []
    cbar = plt.colorbar(sm, cax=cax)

    # Set color bar font properties
    cbar.ax.tick_params(labelsize=15)
    for t in cbar.ax.get_yticklabels():
        t.set_fontname(font)
    cbar.ax.yaxis.label.set_fontname(font)
    cbar.ax.yaxis.label.set_size(15)
    return cbar

def get_relevant_area_to_plot(alpha, known_districts, buffer, districts_of_interest, gdf, ax, column_to_plot, norm, highway_column):
    if known_districts:
        
        # Apply the linewidth mapping
        linewidths = gdf[highway_column].apply(get_linewidth)
        gdf['linewidth'] = linewidths
        # Separate the GeoDataFrame into two groups based on linewidth
        large_lines = gdf[gdf['linewidth'] > 1]
        small_lines = gdf[gdf['linewidth'] == 1]
        
        target_districts = districts[districts['c_ar'].isin(districts_of_interest)]
        gdf['intersects_target_districts'] = gdf.apply(lambda row: target_districts.intersects(row.geometry).any(), axis=1)
        
        # Plot small lines first
        small_lines.plot(column=column_to_plot, cmap='coolwarm', linewidth=small_lines['linewidth'], ax=ax, legend=False,
                        norm=norm, label="Street network", zorder=1)
        
        # Plot large lines after
        large_lines.plot(column=column_to_plot, cmap='coolwarm', linewidth=large_lines['linewidth'], ax=ax, legend=False,
                        norm=norm, label="Street network", zorder=2)
        
        buffered_target_districts = target_districts.copy()
        buffered_target_districts['geometry'] = buffered_target_districts.buffer(buffer)
        if buffered_target_districts.crs != gdf.crs:
            buffered_target_districts.to_crs(gdf.crs, inplace=True)
        outer_boundary = unary_union(buffered_target_districts.geometry).boundary
        relevant_area_to_plot = gpd.GeoSeries(outer_boundary, crs=gdf.crs)
        
    else:
        gdf['og_capacity_reduction_rounded'] = gdf['og_capacity_reduction'].round(decimals=3)
        tolerance = 1e-3
        edges_with_capacity_reduction = gdf[np.abs(gdf['og_capacity_reduction_rounded']) > tolerance]
        coords = [(x, y) for geom in edges_with_capacity_reduction.geometry for x, y in zip(geom.xy[0], geom.xy[1])]
        alpha_shape = alphashape.alphashape(coords, alpha)
        relevant_area_to_plot = gpd.GeoSeries([alpha_shape], crs=gdf.crs)
    return relevant_area_to_plot

def get_linewidth(value):
        if value in [0, 1]:
            return 5
        elif value == 2:
            return 3
        elif value == 3:
            return 2
        else:
            return 1
    

In [22]:
run_path = '/home/enatterer/Development/gnn_predicting_effects_of_traffic_policies/data/runs_optimized/pnc_local_[256]_pnc_global_[512_256]_hidden_layer_str_[512_512_256_128_64]_dropout_0.3_use_dropout_False/'
point_net_conv_layer_structure_local_mlp = [256]
point_net_conv_layer_structure_global_mlp = [512,256]
gat_conv_layer_structure = [512,512,256,128,64]
dropout = 0.3
use_dropout = False 
in_channels = 6 
out_channels = 1 

districts_of_interest = [1, 2, 3, 4]
zone_to_plot = "zone_1"
test_data = "../../data/test_data/gdf_pop_1pm_policy_in_1_2_3_4.geojson"
test_data = gpd.read_file(test_data)
base_case = "../../data/test_data/gdf_basecase_mean_pop_1pm.geojson"
base_case = gpd.read_file(base_case)    
    
model_path = run_path +  'trained_model/model.pth'
data_created_during_training = run_path + 'data_created_during_training/'
indices_of_datasets_to_use = [0, 1, 3, 4]

scaler_x = joblib.load(data_created_during_training + 'x_scaler.pkl')
scaler_pos = joblib.load(data_created_during_training + 'pos_scaler.pkl')

In [23]:
# Initialize the model
model = garch.MyGnn(in_channels=in_channels, out_channels=out_channels, 
                    point_net_conv_layer_structure_local_mlp=point_net_conv_layer_structure_local_mlp, 
                    point_net_conv_layer_structure_global_mlp = point_net_conv_layer_structure_global_mlp,
                    gat_conv_layer_structure=gat_conv_layer_structure,
                    dropout=dropout,
                    use_dropout=use_dropout)

# Load the model state dictionary
model.load_state_dict(torch.load(model_path))

Initializing PointNetConv(local_nn=Sequential(
  (0): Linear(in_features=6, out_features=256, bias=True)
  (1): ReLU()
), global_nn=Sequential(
  (0): Linear(in_features=256, out_features=512, bias=True)
  (1): Linear(in_features=512, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=512, bias=True)
  (4): ReLU()
))
Initializing 0.weight with kaiming_normal
Initializing 0.bias with zeros
Initializing 0.weight with kaiming_normal
Initializing 0.bias with zeros
Initializing 1.weight with kaiming_normal
Initializing 1.bias with zeros
Initializing 3.weight with kaiming_normal
Initializing 3.bias with zeros
Initializing Linear(in_features=6, out_features=256, bias=True)
Initializing Linear(in_features=256, out_features=512, bias=True)
Initializing Linear(in_features=512, out_features=256, bias=True)
Initializing Linear(in_features=256, out_features=512, bias=True)
Initializing GATConv(512, 512, heads=1)
Initializing GATConv(512, 256, heads=1)
Initializin

<All keys matched successfully>

In [24]:
test_input_linegraph = pio.create_test_data_object(base_case=base_case, test_data = test_data) # check this function if there have been changes in the features of the data

In [25]:
test_data_list = [test_input_linegraph] # we do it for just one test data object, for now.
dataset_only_relevant_dimensions = gio.cut_dimensions(dataset=test_data_list, indices_of_dimensions_to_keep=indices_of_datasets_to_use)
test_subset = Subset(dataset_only_relevant_dimensions[0], 0)

In [26]:
def normalize_one_dataset_given_scaler(dataset_input, x_scalar_list = None, pos_scalar=None):
    dataset = normalize_x_values_given_scaler(dataset_input, x_scalar_list)
    dataset.pos = torch.tensor(pos_scalar.transform(dataset.pos.numpy()), dtype=torch.float)
    return dataset

def normalize_x_values_given_scaler(dataset, x_scaler_list):
    for i in range(4):
        scaler = x_scaler_list[i]
        data_x_dim = replace_invalid_values(dataset.x[:, i].reshape(-1, 1))
        normalized_x_dim = torch.tensor(scaler.transform(data_x_dim.numpy()), dtype=torch.float)
        dataset.x[:, i]=  normalized_x_dim.squeeze()
    return dataset

In [27]:
dataset_normalized = normalize_one_dataset_given_scaler(dataset_input=test_subset.dataset, x_scalar_list=scaler_x, pos_scalar=scaler_pos)
test_dl = gio.create_dataloader(dataset=dataset_normalized, is_train=False, batch_size=8, train_ratio=0, is_test=True)

Total dataset length: 5


In [28]:
def compute_r2_torch_with_mean_targets(mean_targets, preds, targets):
    ss_tot = torch.sum((targets - mean_targets) ** 2)
    ss_res = torch.sum((targets - preds) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

def validate_one_model(model, data, loss_func, device):
    model.eval()
    pred = []
    actual = []
    with torch.inference_mode():
        input_node_features, targets = data.x.to(device), data.y.to(device)
        predicted = model(data.to(device))
        # print(predicted.shape)
        pred.append(predicted)
        actual.append(targets)
        val_loss = loss_func(predicted, targets).item()
    actual_vals = torch.cat(actual)
    predicted_vals = torch.cat(pred)
    
    mean_targets = torch.mean(targets)
    r_squared = compute_r2_torch_with_mean_targets(mean_targets = mean_targets, preds=predicted_vals, targets=actual_vals)
    baseline_loss = loss_func(targets, torch.full_like(predicted_vals, mean_targets))
    return val_loss, r_squared, targets, predicted, baseline_loss


# loss_fct= torch.nn.MSELoss()
# test_loss, r_squared, actual_vals, predictions, baseline_loss = validate_one_model(model, dataset_normalized, loss_fct, device)


In [29]:
def validate_one_model(model, data, loss_func, device):
    model.eval()
    pred = []
    actual = []
    with torch.inference_mode():
        input_node_features, targets = data.x.to(device), data.y.to(device)
        predicted = model(data.to(device))
        # print(predicted.shape)
        pred.append(predicted)
        actual.append(targets)
        val_loss = loss_func(predicted, targets).item()
    actual_vals = torch.cat(actual)
    predicted_vals = torch.cat(pred)
    r_squared = compute_r2_torch(preds=predicted_vals, targets=actual_vals)
    mean_targets = torch.mean(targets)
    baseline_loss = loss_func(targets, torch.full_like(predicted_vals, mean_targets))
    return val_loss, r_squared, targets, predicted, baseline_loss

def compute_r2_torch(preds, targets):
    """Compute R^2 score using PyTorch."""
    print(targets.shape)
    mean_targets = torch.mean(targets)
    ss_tot = torch.sum((targets - mean_targets) ** 2)
    ss_res = torch.sum((targets - preds) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

loss_fct = torch.nn.MSELoss()
loss_fct_aux = torch.nn.MSELoss(reduce=None)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
test_loss, r_squared, actual_vals, predictions, baseline_loss = validate_one_model(model, dataset_normalized, loss_fct, device)
print(f'Test Loss: {test_loss}')
print(f'r_squared: {r_squared}')
print(f'Baseline Loss: {baseline_loss}')

torch.Size([31216, 1])
Test Loss: 1.0067453384399414
r_squared: -0.8707122802734375
Baseline Loss: 0.5381614565849304


In [30]:
def data_to_geodataframe(data, original_gdf, predicted_values):
    # Extract the edge index and node features
    node_features = data.x.cpu().numpy()
    target_values = data.y.cpu().numpy()
    predicted_values = predicted_values.cpu().numpy() if isinstance(predicted_values, torch.Tensor) else predicted_values

    # Create edge data
    edge_data = {
        'from_node': original_gdf["from_node"].values,
        'to_node': original_gdf["to_node"].values,
        'vol_base_case': node_features[:, 0],  # Assuming capacity is the first feature, and so on
        'capacity_base_case': node_features[:, 1],  
        'capacity_reduction': node_features[:, 2],  
        'highway': node_features[:, 3],  
        'vol_car_change_actual': target_values.squeeze(),  # Assuming target values are car volumes
        'vol_car_change_predicted': predicted_values.squeeze()
    }
    # Convert to DataFrame
    edge_df = pd.DataFrame(edge_data)
    # Create LineString geometry
    edge_df['geometry'] = original_gdf["geometry"].values
    # Create GeoDataFrame
    gdf = gpd.GeoDataFrame(edge_df, geometry='geometry')
    return gdf


gdf = data_to_geodataframe(data=dataset_normalized, original_gdf=test_data, predicted_values=predictions)


In [31]:
gdf_with_og_values = hf.map_to_original_values(input_gdf=gdf, scaler_x=scaler_x)

In [32]:
# indices_roads_with_highway_primary_sec_tertiary = gdf_with_og_values[gdf_with_og_values['og_highway'].isin([1,2,3])].index
# indices_roads_with_highway_primary_ = gdf_with_og_values[gdf_with_og_values['og_highway'].isin([1])].index
# indices_roads_with_highway_sec = gdf_with_og_values[gdf_with_og_values['og_highway'].isin([2])].index
# indices_roads_with_highway_tertiary = gdf_with_og_values[gdf_with_og_values['og_highway'].isin([3])].index

# indices_roads_with_highway_not_primary_sec_tertiary = gdf_with_og_values[~gdf_with_og_values['og_highway'].isin([1, 2, 3])].index

# gdf_with_og_values['og_capacity_reduction_rounded'] = gdf_with_og_values['og_capacity_reduction'].round(decimals=3)
# tolerance = 1e-3
# indices_roads_with_cap_reduction = gdf_with_og_values[gdf_with_og_values['og_capacity_reduction_rounded'] < -1e-3].index
# indices_roads_with_no_cap_reduction = gdf_with_og_values[gdf_with_og_values['og_capacity_reduction_rounded'] >= -1e-3].index

# indices_roads_with_highway_primary_sec_tertiary_and_cap_reduction = gdf_with_og_values[
#     (gdf_with_og_values['og_highway'].isin([1, 2, 3])) & 
#     (gdf_with_og_values['og_capacity_reduction_rounded'] < -1e-3)
# ].index
# indices_roads_with_highway_primary_sec_tertiary_and_not_cap_reduction = gdf_with_og_values[
#     (gdf_with_og_values['og_highway'].isin([1, 2, 3])) & 
#     (gdf_with_og_values['og_capacity_reduction_rounded'] >= -1e-3)
# ].index


# # indices_to_filter_for = indices_roads_with_highway_primary_
# # filtered_actual = actual_vals[indices_to_filter_for]
# # filtered_actual_mean = torch.mean(filtered_actual)
# # filtered_predicted = predictions[indices_to_filter_for]


In [33]:
districts = gpd.read_file("../../data/visualisation/districts_paris.geojson")
target_districts = districts[districts['c_ar'].isin(districts_of_interest)]
gdf_with_og_values['intersects_target_districts'] = gdf_with_og_values.apply(lambda row: target_districts.intersects(row.geometry).any(), axis=1)

In [34]:
indices_this_zone = gdf_with_og_values[gdf_with_og_values['intersects_target_districts']].index
# # overlap = indices_this_zone.intersection(indices_roads_with_highway_primary_)

indices_to_filter_for = indices_this_zone
filtered_actual = actual_vals[indices_to_filter_for]
filtered_actual_mean = torch.mean(filtered_actual)
filtered_predicted = predictions[indices_to_filter_for]

mse_filtered = loss_fct(filtered_actual, filtered_predicted)
baseline_filtered = loss_fct(filtered_actual, torch.full_like(filtered_actual, filtered_actual_mean))
variance = torch.var(loss_fct_aux(filtered_actual, torch.full_like(filtered_actual, filtered_actual_mean)))
r_squared = compute_r2_torch(preds=filtered_predicted, targets=filtered_actual)
print(baseline_filtered)
print(mse_filtered)
print(r_squared)

torch.Size([1820, 1])
tensor(2.4753, device='cuda:0')
tensor(1.6242, device='cuda:0')
tensor(0.3439, device='cuda:0')


  variance = torch.var(loss_fct_aux(filtered_actual, torch.full_like(filtered_actual, filtered_actual_mean)))


In [35]:
# print(loss_fct(predictions, actual_vals))
# mean_value = torch.mean(actual_vals)
# print(mean_value)
# print(loss_fct(predictions, torch.full_like(predictions, mean_value)))

In [36]:
i = 0
fixed_norm_max = 3
gdf = hf.data_to_geodataframe(data=test_dl.dataset, original_gdf=test_data, predicted_values=predictions)
gdf_with_og_values = hf.map_to_original_values(input_gdf=gdf, scaler_x =scaler_x)
# plot_combined_output(gdf_input=gdf_with_og_values, column_to_plot="og_vol_car_change_predicted", 
#                         save_it=True, number_to_plot=None, zone_to_plot = zone_to_plot, is_predicted=True, alpha=0, use_fixed_norm=True, 
#                         fixed_norm_max = fixed_norm_max,
#                         known_districts = True, buffer = 0.0005, districts_of_interest=districts_of_interest)
# plot_combined_output(gdf_input=gdf_with_og_values, column_to_plot="og_vol_car_change_actual", save_it=True, 
#                         number_to_plot=None, zone_to_plot = zone_to_plot,is_predicted=False,alpha=10,use_fixed_norm=True, 
#                         fixed_norm_max = fixed_norm_max,
#                         known_districts = True, buffer = 0.0005, districts_of_interest=districts_of_interest)

In [37]:
loss_fct_aux = torch.nn.MSELoss(reduction='none')
# actual_mean = torch.mean(actual_vals)
variance = torch.var(loss_fct_aux(filtered_actual, torch.full_like(filtered_actual, filtered_actual_mean)))
print(f'Test Loss: {test_loss}')
print(f'Baseline Loss: {baseline_loss}')
print(f'r_squared: {r_squared}')
print(f'variance: {variance}')

Test Loss: 1.0067453384399414
Baseline Loss: 0.5381614565849304
r_squared: 0.3438594341278076
variance: 50.607879638671875


In [38]:
gdf_in_meters = gdf_with_og_values.to_crs("EPSG:32633")
gdf_in_meters.length

tolerance = 1e-3
gdf_with_capacity_reduction = gdf_in_meters[abs(gdf_in_meters['og_capacity_reduction']) > tolerance]
gdf_with_capacity_reduction['length'] = gdf_with_capacity_reduction.length
total_length = gdf_with_capacity_reduction['length'].sum()/1000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [39]:
total_length

37.03149543114152

In [40]:
len(gdf_with_capacity_reduction)

596