## Purpose of this notebook
1. Distinguish influx, outflux edges.
2. Make segment of edges that can be considered closed flow (i.e. no junction).
3. Search possible OD pairs (i.e. pair of influx & outflux).
4. Then make OD filter dataframe.

In [None]:
# Import modules
import pandas as pd
import numpy as np

In [None]:
# Path to base edge information in csv format.
str_path_df_edge_base = "munich_motorway_v3/df_edge_base.csv"

In [None]:
# Read edge info as dataframe.
df_edge_base = pd.read_csv(str_path_df_edge_base, index_col= 0)
df_edge_base.head()

In [None]:
# Check number of edges.
nr_tot_edge = df_edge_base.shape[0]
nr_tot_edge

In [None]:
# List of unique "from(start)" nodes and "to(destination)" nodes.
lst_edge_from_unique = list(df_edge_base.edge_from.unique())
lst_edge_to_unique = list(df_edge_base.edge_to.unique())

# Check number of unique nodes both for start and destination.
print(len(lst_edge_from_unique))
print(len(lst_edge_to_unique))

In [None]:
# Need to produce list of junctional nodes.
# Junctional nodes will be appeared multiple times in the columns of "edge_from" or "edge_to".
se_count_nodes_from = df_edge_base.edge_from.value_counts() # Serise for count values of node in "from" column.
se_count_nodes_to = df_edge_base.edge_to.value_counts()     # Serise for count values of node in "to" column.

In [None]:
# Check!
se_count_nodes_from

In [None]:
# Check!
se_count_nodes_to

In [None]:
# List of Node ids in "from" column that are appeared more than once.
lst_jct_node_from_id = se_count_nodes_from[
    se_count_nodes_from > 1
].index.tolist()

print(lst_jct_node_from_id)

In [None]:
# List of Node ids in "to" column that are appeared more than once.
lst_jct_node_to_id = se_count_nodes_to[
    se_count_nodes_to > 1
].index.tolist()

print(lst_jct_node_to_id)

In [None]:
# Merge junctional node ids.
lst_jct_node_id = lst_jct_node_from_id + lst_jct_node_to_id
lst_jct_node_id = list(set(lst_jct_node_id))
print(lst_jct_node_id)

In [None]:
# Check number of junctional nodes.
print(len(lst_jct_node_from_id))
print(len(lst_jct_node_to_id))
print(len(lst_jct_node_id))

# There could be some junctional nodes that are both "from" and "to".

In [None]:
# Define new column for edge types: Influx, Outflux, Junctional and Interim
df_edge_base["edge_type"] = np.nan
df_edge_base.head()

In [None]:
# Influx edges should have "from(start)" nodes that are not appeared in the list of "to(destination)" nodes.
# At first, make mask for reverse case so that "isin" function can be applied. Then reverse it.
mask_influx_inverse = df_edge_base["edge_from"].isin(lst_edge_to_unique)
mask_influx = ~mask_influx_inverse

# Check number of influx edges.
nr_influx = mask_influx[mask_influx == True].shape[0]
nr_influx

In [None]:
# Outflux edges should have "to(destination)" nodes that are not appeared in the list of "from(start)" nodes.
# At first, make mask for reverse case so that "isin" function can be applied. Then reverse it.
mask_outflux_inverse = df_edge_base["edge_to"].isin(lst_edge_from_unique)
mask_outflux = ~mask_outflux_inverse

# Check number of outflux edges.
nr_outflux = mask_outflux[mask_outflux == True].shape[0]
nr_outflux

In [None]:
# Chec if there is overlapped edges between influx & outflux.
# There should not be "True" case.
(mask_influx & mask_outflux).value_counts()

In [None]:
# Define junctional edges that are adjacent to junction nodes.
# First filter is applied to "from" noedes.
mask_jct_with_from = df_edge_base["edge_from"].isin(lst_jct_node_id)
nr_jct_edge_with_from = mask_jct_with_from.value_counts()[True]
nr_jct_edge_with_from

In [None]:
# Second filter is applied to "to" nodes.
mask_jct_with_to = df_edge_base["edge_to"].isin(lst_jct_node_id)
nr_jct_edge_with_to = mask_jct_with_to.value_counts()[True]
nr_jct_edge_with_to

In [None]:
# Combine masks for edges adjacent to junction nodes.
# Some edges are junctional for both "from" and "to" junction nodes.
mask_jct_raw = (mask_jct_with_from | mask_jct_with_to) # OR gate will merge all junctional cases.
nr_jct_edge_raw = mask_jct_raw.value_counts()[True]
nr_jct_edge_raw

In [None]:
# Influx & Outflux edges should be removed from junctional edges for future purpose.
# In some cases, junctional edges influde both Influx & Outflux edges.
mask_jct = (mask_jct_raw & ~(mask_influx | mask_outflux))
nr_jct_edge = mask_jct.value_counts()[True]
nr_jct_edge

In [None]:
# Mask for interim edges: They are not influx, outflux and junctional edges.
mask_interim_inverse = (mask_influx | mask_outflux | mask_jct)
mask_interim = ~mask_interim_inverse
nr_interim = mask_interim.value_counts()[True]
nr_interim

In [None]:
# Check if sum of influx, outflux, junctional and interim edges coincides with total number of edges.
(nr_influx + nr_outflux + nr_jct_edge + nr_interim) == nr_tot_edge

In [None]:
# With defined masks, set proper edge type values for each edge.
df_edge_base.loc[mask_influx, "edge_type"] = "influx"
df_edge_base.loc[mask_outflux, "edge_type"] = "outflux"
df_edge_base.loc[mask_interim, "edge_type"] = "interim"
df_edge_base.loc[mask_jct, "edge_type"] = "junctional"

In [None]:
# Check new column "edge_type"
df_edge_base.head()

In [None]:
# Check again edge type has been properly set.
# No NaN!!!
df_edge_base.edge_type.value_counts()

In [None]:
# Import adjacency list array. This is produced by using original dataframe before merging segments.
arr_adjList = np.load("munich_motorway_v3/arr_adj_multi_proc.npy")

In [None]:
# Make dataframe from adjacency list.
df_adjList = pd.DataFrame(arr_adjList, columns=["stEdge", "desEdge"])
df_adjList.head()

In [None]:
# Function to return a edge connected in forward direction.
# Accepting edge id and return connected edge id.
def connected_edge_forward (in_df_adjList:pd.DataFrame, in_df_edge_base:pd.DataFrame , in_str_edge_id:str):
    # Get index value from df_edge_base for input edge id.
    idx_interim_edge = in_df_edge_base[in_df_edge_base["edge_id"] == in_str_edge_id].index.values[0]
    # Return connected edge idxs for given index value. In forward direction st --> des.
    lst_idx_connected_fw_edge = in_df_adjList[in_df_adjList["stEdge"] == idx_interim_edge]["desEdge"].values.tolist()
    # Return some error message when connected edges are not single or not connected.
    if len(lst_idx_connected_fw_edge) > 1:
        return "NotSingle"
    elif len(lst_idx_connected_fw_edge) == 0:
        return "NotConnect"
    else:
        # Return one edge id.
        idx_connected_fw_edge = lst_idx_connected_fw_edge[0]
        str_connected_fw_edge_id = in_df_edge_base.at[idx_connected_fw_edge, "edge_id"]
        return str_connected_fw_edge_id

# Function to return a edge connected in backwrad direction.
# Accepting edge id and return connected edge id.
def connected_edge_backward (in_df_adjList:pd.DataFrame, in_df_edge_base:pd.DataFrame , in_str_edge_id:str):
    # Get index value from df_edge_base for input edge id.
    idx_interim_edge = in_df_edge_base[in_df_edge_base["edge_id"] == in_str_edge_id].index.values[0]
    # Return connected edge idxs for given index value. In backward direction des --> st.
    lst_idx_connected_bw_edge = in_df_adjList[in_df_adjList["desEdge"] == idx_interim_edge]["stEdge"].values.tolist()
    # Return some error message when connected edges are not single or not connected.
    if len(lst_idx_connected_bw_edge) > 1:
        return "NotSingle"
    elif len(lst_idx_connected_bw_edge) == 0:
        return "NotConnect"
    else:
        # Return one edge id.
        idx_connected_bw_edge = lst_idx_connected_bw_edge[0]
        str_connected_bw_edge_id = in_df_edge_base.at[idx_connected_bw_edge, "edge_id"]
        return str_connected_bw_edge_id

In [None]:
# Definition for exceptions.
class notSingleConnected(Exception):
    def __init__(self):
        super().__init__("Interim edge is connected more than one edge.")
        
class notConnected(Exception):
    def __init__(self):
        super().__init__("Interim edge is not connected.")

In [None]:
# List of interime edge ids.
lst_interim_edge_id = df_edge_base.loc[mask_interim, "edge_id"].values.tolist()
lst_interim_edge_id_tmp = lst_interim_edge_id.copy()
# List of junctional edge ids.
lst_jct_edge_id = df_edge_base.loc[mask_jct, "edge_id"].values.tolist()
# List of influx/outflux edge ids.
lst_in_out_edge_id = df_edge_base.loc[(mask_influx | mask_outflux), "edge_id"].values.tolist()

# Empty list for collection of interim "segments".
lst_collect_interim_seg = []

try:
    
    while True:                         # Segment list update loop.
        
        str_id_interim_init = lst_interim_edge_id_tmp.pop()     # Pick one interim edge id that "segment" contains.
        str_id_interim_fw = str_id_interim_init                 # Copy for forward search loop.
        str_id_interim_bw = str_id_interim_init                 # Copy for backward search loop.
        
        lst_interim_seg = []                                    # Empty list for collection of interim + terminal edges of "segement".
        lst_interim_seg.append(str_id_interim_init)             # Add initial interim edge id.
        
        while True:                     # Forward search loop.
            
            # Get connected edge in flow direction.    
            str_id_interim_fw = connected_edge_forward(
                in_df_adjList= df_adjList,
                in_df_edge_base= df_edge_base,
                in_str_edge_id= str_id_interim_fw
            )
            # Some erroeneous exceptions handling.
            if str_id_interim_fw == "NotSingle":
                raise notSingleConnected
            elif str_id_interim_fw == "NotConnect":
                raise notConnected
            
            if str_id_interim_fw in lst_in_out_edge_id:         # If detected fw edge is influx/outflux edge,                     
                break                                           # then, it will not included in "segment" edges and break this loop.       
            
            lst_interim_seg.append(str_id_interim_fw)           # Append it to list for collection of "segment" edges.
            
            if str_id_interim_fw in lst_jct_edge_id:            # If connected edge is one of junctional edges.
                break                                           # then, break this loop.
        
        while True:                     # Backward search loop.
            
            # Get connected edge in inverse-flow direction.
            str_id_interim_bw = connected_edge_backward(
                in_df_adjList= df_adjList,
                in_df_edge_base= df_edge_base,
                in_str_edge_id= str_id_interim_bw                
            )
            # Some erroeneous exceptions handling.
            if str_id_interim_bw == "NotSingle":
                raise notSingleConnected
            elif str_id_interim_bw == "NotConnect":
                raise notConnected
            
            if str_id_interim_bw in lst_in_out_edge_id:         # If detected fw edge is influx/outflux edge,  
                break                                           # then, it will not included in "segment" edges and break this loop.
            
            lst_interim_seg.insert(0, str_id_interim_bw)        # Insert it to first position in the list for collection for "segment" edges.
            
            if str_id_interim_bw in lst_jct_edge_id:            # If connected edge is one of influx/outflux/junctional edges.
                break                                           # then, break this loop.
        
        lst_collect_interim_seg.append(lst_interim_seg)         # Once "segement" has been accomplished. Append it to collector list.
        
        # Remove interim edges that are already used for "segment". Then, go to next iteration.
        lst_interim_edge_id_tmp = [i for i in lst_interim_edge_id_tmp if i not in lst_interim_seg]
        
        if len(lst_interim_edge_id_tmp) == 0:                   # Once all interim edges are used for "segment"
            break                                               # then, break this loop. EOL.
        
except notSingleConnected as e: 
    print("User exception: " + str(e))
    
except notConnected as e:
    print("User exception: " + str(e))

In [None]:
# List contains lists containing edge_idx for "segement".
print(lst_collect_interim_seg)

In [None]:
# Number of created segments.
len(lst_collect_interim_seg)

In [None]:
# Function to convert list to string.
def list2str (in_lst_str_edges:list) -> str:
    str_joined = " & ".join(i for i in in_lst_str_edges)
    return str_joined

# Function to convert string to list.
def str2list (in_str_edges:str) -> list:
    lst_split = in_str_edges.split(" & ")
    return lst_split

In [None]:
# Define new edge info dataframe with segmented edges.
# A new column "seg_edges" will contains list of segmented edge ids converted as string format.
df_edge_segmented = df_edge_base[["edge_id", "edge_from", "edge_to", "edge_type"]].copy()
df_edge_segmented["seg_edges"] = str()

# Numbering of segmented edges.
idx_seg = 0

# Loop calculation for each segment.
for lst_seg_edges in lst_collect_interim_seg:
    
    # Start & End edges of segment.
    str_stEdgeId_seg_tmp = lst_seg_edges[0]
    str_endEdgeId_seg_tmp = lst_seg_edges[-1]
    # Start & End nodes of segment.
    str_stNodeId_seg_tmp = df_edge_base[df_edge_base.edge_id == str_stEdgeId_seg_tmp]["edge_from"].values[0]
    str_endNodeId_seg_tmp = df_edge_base[df_edge_base.edge_id == str_endEdgeId_seg_tmp]["edge_to"].values[0]  
    
    # Mask for segmented edges.
    mask_seg_tmp = df_edge_segmented["edge_id"].isin(lst_seg_edges)
    # Remove rows for edges that are segmented.
    df_edge_segmented = df_edge_segmented[~mask_seg_tmp]
    
    # Insert row for a new segment.
    idx_seg += 1
    dic_seg_tmp = {
        "edge_id" : f"segment_{idx_seg}",
        "edge_from" : str_stNodeId_seg_tmp,
        "edge_to" : str_endNodeId_seg_tmp,
        "edge_type" : "segment",
        "seg_edges" : list2str(lst_seg_edges)
    }
    df_seg_single = pd.DataFrame([list(dic_seg_tmp.values())], columns= list(dic_seg_tmp.keys()))
    
    # By ignoring index, original index relations will be lost !!!!
    df_edge_segmented = pd.concat([df_edge_segmented, df_seg_single], ignore_index= True, sort= False)

In [None]:
# Check segmented data frame.
df_edge_segmented

In [None]:
# Check data types.
df_edge_segmented.dtypes

In [None]:
# Check number of each edge type.
df_edge_segmented.edge_type.value_counts()

In [None]:
# Compare with original edge type configuration. Before segmentation.
df_edge_base.edge_type.value_counts()

In [None]:
# To prepare connectivity dictionary that containing posible pair of (in --> out flux),
# lsit of influx edge idxs need to be defined.
lst_idx_influx_edges_connect = df_edge_base[df_edge_base.edge_type == "influx"].index.tolist()
print(len(lst_idx_influx_edges_connect))                # Number of influx edges.
print(len(lst_idx_influx_edges_connect) == nr_influx)   # Check if the value is not changed from original.
print(lst_idx_influx_edges_connect)                     # List contains INDEX number for df_edge_base !!!! 

In [None]:
# Functions to return a list of connected edge idxs in forward direction.
# Accepting list of edge idxs and return list of forward connected edge idxs.
def connected_edge (in_df_adjList:pd.DataFrame, in_lst_idxStPoints:list) -> list:
    
    lst_connected = []
        
    for idx_stPoint in in_lst_idxStPoints:
        
        if idx_stPoint not in in_df_adjList["stEdge"]:
            continue
        else:
            lst_connected_tmp1 = in_df_adjList[in_df_adjList['stEdge'] == idx_stPoint]["desEdge"].values.tolist()
            # No duplicated edge idxs.
            lst_connected = list(set(lst_connected + lst_connected_tmp1))

    return lst_connected

# Function to return a list of edge idxs that are outflux.
def fil_outflux_edge (in_df_edge_base:pd.DataFrame, in_lst_connected_edges:list) -> list:
    
    lst_idxDes = []
    
    for idx_connected in in_lst_connected_edges:
        if in_df_edge_base.loc[idx_connected, "edge_type"] == "outflux":
            lst_idxDes.append(idx_connected)
            
    return list(set(lst_idxDes)) # No duplicated edge idx.

In [None]:
# Define a list to collect reachable destination edges(outflux) for each influx edge.
lst_collect_idxDesPoints = []

# Loop calculation for each influx edge.
for idx_stPoint in lst_idx_influx_edges_connect:
    
    # Initialization of lists for each influx edge.
    lst_idxStPoints_tmp1 = [idx_stPoint]    # Start point: idx of influx edge for this loop.
    lst_idxDesPoints_tmp1 = []              # Destination points : reachable idxs of outflux edges.
    lst_connected_hist_tmp1 = []            # History of connected edge. Revisiting same edges will not affect the results.

    while True:
        
        # Find all foward connected edges. Only newly found edges will be added for each step.
        lst_connected_tmp2 = connected_edge(df_adjList, lst_idxStPoints_tmp1)
        lst_connected_tmp2 = [i for i in lst_connected_tmp2 if i not in lst_connected_hist_tmp1]
        # Update history of connected edge.
        lst_connected_hist_tmp1 = list(set(lst_connected_hist_tmp1 + lst_connected_tmp2))
        
        # Filter out outflux edges from newly found connected edges.
        lst_idxDes_tmp2 = fil_outflux_edge(df_edge_base, lst_connected_tmp2)
        # Only newly found destination (outflux) edges will be added to output list.
        lst_idxDesPoints_tmp1 = list(set(lst_idxDesPoints_tmp1 + lst_idxDes_tmp2))
        # Start edges (points) is prepared by excluding destination edges.
        lst_idxStPoints_tmp1 = [i for i in lst_connected_tmp2 if i not in lst_idxDes_tmp2]
        
        # If start edges are run out, break this loop initiated from one start index.
        if len(lst_idxStPoints_tmp1) == 0:
            break
    
    # Append list of reachable destinations (outflux idxs) to collecting list.
    lst_collect_idxDesPoints.append(lst_idxDesPoints_tmp1)

In [None]:
# Collected lists of reachable destinations (outflux idxs) for each influx edge.
# Will be organised with dictionary varuable. " influx edge id : [reachable outflux edge ids], ...."
dic_reachable_in_out = {}

# List of influx edge ids. Not INDEX nr!!!
lst_id_influx_edges_connect = list(map(lambda x: df_edge_base.at[x,"edge_id"],lst_idx_influx_edges_connect))

# Loop to update target dictionary.
for idx in range(len(lst_id_influx_edges_connect)):
    # Influx edge id.
    id_edge_influx_tmp = lst_id_influx_edges_connect[idx]
    # List of reachable outflux edge ids.
    lst_edge_reachable_outflux_tmp = list(map(lambda x: df_edge_base.at[x,"edge_id"],lst_collect_idxDesPoints[idx]))
    # Update dictionary element.
    dic_reachable_in_out[id_edge_influx_tmp] = lst_edge_reachable_outflux_tmp

# Check it!
print(len(dic_reachable_in_out))
print(dic_reachable_in_out)

In [None]:
# To set order for "edge type" new categorical column is defined.
df_edge_segmented["edge_type_cat"] = pd.Categorical(
    df_edge_segmented["edge_type"],
    categories= ["influx", "outflux", "junctional", "segment"],
    ordered= True
)
df_edge_segmented[["edge_type", "edge_type_cat"]]

In [None]:
# Re-order segmented edge info dataframe with edge types.
# Influx --> outflux --> junctional --> segement
df_edge_seg_ordered = df_edge_segmented.sort_values("edge_type_cat")
df_edge_seg_ordered.reset_index(inplace= True, drop= True)
df_edge_seg_ordered

In [None]:
# Store ordered segemented edge dataframe for future purpose.
df_edge_seg_ordered.to_pickle("munich_motorway_v3/df_edge_seg_ordered.pkl")

In [None]:
# For OD value creation, OD filter dataframe is defined.
# An element of the dataframe will be set to 1 only in case of reachable influx-outflux pair.

# List of ordered-segmented edge ids.
lst_ordered_edge_id = df_edge_seg_ordered.edge_id.values.tolist()
len_od = len(lst_ordered_edge_id)

# Set initial zero value dataframe with edge ids dimensions.
arr_od_zros = np.zeros((len_od,len_od))
df_od_fil = pd.DataFrame(arr_od_zros, index= lst_ordered_edge_id, columns= lst_ordered_edge_id)

# Set 1 for possible pair of influx-outflux.
for id_influx, lst_id_outflux in dic_reachable_in_out.items():
    
    for id_outflux in lst_id_outflux:
        df_od_fil.at[id_influx, id_outflux] = 1

In [None]:
# Let's check it!

import plotly.express as px

fig = px.imshow(df_od_fil.values)
fig.show()

In [None]:
# Number of reachable pairs.
df_od_fil.sum().sum()

In [None]:
# Save OD filter 
df_od_fil.to_pickle("munich_motorway_v3/df_od_fil.pkl")

In [None]:
# Safely stored?
df_tmp = pd.read_pickle("munich_motorway_v3/df_od_fil.pkl")
all(df_od_fil == df_tmp)