In [11]:
import pickle
import networkx as nx
import pandas as pd
import numpy as np
import time

In [12]:
with open(r'graph_centrality_codes/nodes_edges_weighted.pickle', 'rb') as handle:
    B_matrix_weighted, node_coordinates_weighted = pickle.load(handle)
    
with open(r'graph_centrality_codes/distance.pickle', 'rb') as handle:
    distance_array = pickle.load(handle)  
    
#distance_array (1st col - distance (m), 2nd col - time (s), 3rd col - time in traffic (s))

In [13]:
origin_destination_path_df = pd.read_pickle('origin_destination_path_df.pkl')

In [15]:
# Initialize array of 0s and add as a column to B_matrix_weighted

zero_column = np.zeros((B_matrix_weighted.shape[0], 1))
B_matrix_weighted_array = np.hstack((B_matrix_weighted, zero_column))
B_matrix_weighted_array = B_matrix_weighted_array.astype(int)

  B_matrix_weighted_array = B_matrix_weighted_array.astype(int)


In [16]:
print(B_matrix_weighted_array.shape)

(70983, 7)


In [17]:
B_matrix_weighted_df = pd.DataFrame(B_matrix_weighted_array)
B_matrix_weighted_df

Unnamed: 0,0,1,2,3,4,5,6
0,31017,30686,640120,684,24,55,0
1,40651,40669,3976160,340,28,23,0
2,15202,15358,1417450,642,36,322,0
3,15276,15427,1417420,642,36,261,0
4,15358,15473,1417460,294,36,83,0
...,...,...,...,...,...,...,...
70978,37860,37869,4728880,195,36,6,0
70979,37860,37862,4728870,195,36,1,0
70980,37856,37860,4728890,166,36,9,0
70981,37877,37850,309910,407,36,0,0


In [18]:
# Approach 1: Indexing the original B_matrix_weighted in array form (Issue: In theory this should be faster, and if you print individual values commented, it should be correct but it doesn't update the original array for some reason)

# Iterate over each pair of adjacent elements in the path array
start=time.time()

for index, row in origin_destination_path_df.iterrows():
    path = row['path']
    for i in range(len(path) - 1):
        # Find the indices of the path elements in the B array
        idx1 = np.where(B_matrix_weighted_array[:, 0] == path[i])[0]
        idx2 = np.where(B_matrix_weighted_array[:, 1] == path[i+1])[0]
        
        if len(idx1) > 0 and len(idx2) > 0:
            idx = np.intersect1d(idx1, idx2)
            # print(idx)
            if len(idx) > 0:
                B_matrix_weighted_array[idx, 6] = B_matrix_weighted_array[idx, 6] + row['S000_adjusted']
                # print(row['S000_adjusted'])

end=time.time()

print(end-start)

In [20]:
print(len(B_matrix_weighted_array[B_matrix_weighted_array[:, 6] != 0]))

array([[   794,    769, 708400,    343,     40,     16,      2]])

In [21]:
# Approach 2: Indexing the original B_matrix_weighted in dataframe form (This would be slower than the first approach but is working)

# This iterates over each row of the origin_destination_path_df dataframe and for each row, it iterates over each pair of adjacent nodes in the path column.
# For each pair of nodes, it checks if the nodes are present in the first and second columns (or reversed) of the B_matrix_weighted_df dataframe.
# If the nodes are present, it adds the S000_adjusted value to the 6th column of the B_matrix_weighted_df dataframe.

start=time.time()

for index, row in origin_destination_path_df.iterrows():
    for i in range(len(row['path']) - 1):
        if (row['path'][i] in B_matrix_weighted_df[0] and row['path'][i + 1] in B_matrix_weighted_df[1]):
            B_matrix_weighted_df.loc[(B_matrix_weighted_df[0] == row['path'][i]) & (B_matrix_weighted_df[1] == row['path'][i + 1]), 6] += row['S000_adjusted']
        elif (row['path'][i] in B_matrix_weighted_df[1] and row['path'][i + 1] in B_matrix_weighted_df[0]):
            B_matrix_weighted_df.loc[(B_matrix_weighted_df[1] == row['path'][i]) & (B_matrix_weighted_df[0] == row['path'][i + 1]), 6] += row['S000_adjusted']

end=time.time()

print(end-start)

  B_matrix_weighted_df.loc[(B_matrix_weighted_df[0] == row['path'][i]) & (B_matrix_weighted_df[1] == row['path'][i + 1]), 6] += row['S000_adjusted']


In [23]:
print(len(B_matrix_weighted_df[B_matrix_weighted_df[6] != 0]))

Unnamed: 0,0,1,2,3,4,5,6
148,28933,28934,3940600,620,60,73,2.000000
209,397,403,4332600,321,80,17,14.625000
273,28802,28805,3933200,370,60,56,2.533333
274,28805,28806,3933200,370,60,0,2.533333
297,34,41,4238000,812,36,49,0.166667
...,...,...,...,...,...,...,...
70009,6,5,1372300,317,40,16,9.833333
70289,19,23,1009800,340,40,18,21.300000
70833,30544,30262,6809460,533,47,2,2.000000
70834,30622,30544,6809430,255,47,1,2.000000


In [161]:
# Show where the 6th column in B_matrix_weighted_df is not 0

print('Number of node pairs updated in B_matrix_weighted: ' + str(len(B_matrix_weighted_df[B_matrix_weighted_df[6] != 0])))

Number of node pairs updated in B_matrix_weighted: 709


In [164]:
# Convert B_matrix_weighted_df back to a numpy array

B_matrix_weighted_updated = B_matrix_weighted_df.to_numpy()

# Export B_matrix_weighted_updated as a pickle file

with open('B_matrix_weighted_updated.pickle', 'wb') as handle:
    pickle.dump(B_matrix_weighted_updated, handle, protocol=pickle.HIGHEST_PROTOCOL)