In [1]:
import numpy as np
import pandas as pd
import json

In [2]:
data = pd.read_csv("../data/dataviz.csv")
data.head()

Unnamed: 0,plat,plng,dlat,dlng,t,duration,distance,road
0,46.517116,6.630342,46.508528,6.627598,11:15:00,132.0,1336.0,"[311463563, 2940544168, 311463564, 561463459, ..."
1,46.517116,6.630342,46.521429,6.656347,20:00:00,426.0,3702.0,"[311463563, 2940544168, 311463564, 561463459, ..."
2,46.517116,6.630342,46.558792,6.680104,18:15:00,707.0,8354.0,"[311463563, 2940544168, 311463564, 561463459, ..."
3,46.520649,6.635173,46.51959,6.634718,18:45:00,163.0,1521.0,"[2223091605, 3786724165, 3786724164, 302530573..."
4,46.510731,6.630553,46.51595,6.657525,19:00:00,437.0,3818.0,"[3516999773, 3817926827, 567951313, 567951312,..."


In [105]:
with open('../data/nodes_dict.json') as json_file:  
    nodes_dict = json.load(json_file)

In [134]:
# Compute number of passages on each edge
nonvalid_nodes = set()
def update_path_heat(edges_heat, path_nodes, nodes_dict):
    if len(path_nodes) <=1:
        return edges_heat
    elif path_nodes[0] not in nodes_dict:
        nonvalid_nodes.add(path_nodes[0])
        return update_path_heat(edges_heat, path_nodes[1:], nodes_dict)
    elif path_nodes[1] not in nodes_dict:
        nonvalid_nodes.add(path_nodes[1])
        return update_path_heat(edges_heat, path_nodes[:1]+path_nodes[2:], nodes_dict)
    else:
        edge = path_nodes[:2]            
        dic_key = '-'.join(sorted(edge))

        edges_heat[dic_key] = 1 + edges_heat.get(dic_key, 0)
        return update_path_heat(edges_heat, path_nodes[1:], nodes_dict)

edges_heat = {}
nonvalid_nodes = set()
for i, path_nodes in paths_nodes.iteritems():
    edges_heat = update_path_heat(edges_heat, path_nodes, nodes_dict)

len(edges_heat)

12660

In [136]:
# Make a dataframes that has a heat value for each edge (pair of nodes)
heat_df = pd.DataFrame(list(edges_heat.items()), columns=['edge','heat'])

heat_df['node1'] = heat_df.apply (lambda row: row.edge.split('-')[0],axis=1)
heat_df['node2'] = heat_df.apply (lambda row: row.edge.split('-')[1],axis=1)

heat_df = heat_df[['node1', 'node2', 'heat']].sort_values(['heat'], ascending=False)
heat_df = heat_df.reset_index(drop=True)
heat_df.head()

Unnamed: 0,node1,node2,heat
0,2657469978,3786724226,995
1,3787835432,4601360744,995
2,3786724226,4601360744,995
3,2223091605,3786724165,995
4,2223091603,3025305734,995


In [137]:
heat_df.shape

(12660, 3)

In [138]:
# filter out non-valid nodes ids
heat_df['valid'] = heat_df.apply (lambda row: row.node1 in nodes_dict and row.node2 in nodes_dict,axis=1)

heat_df=heat_df[heat_df.valid==True]
heat_df = heat_df.drop('valid', axis=1)
heat_df.shape

(12660, 3)

In [139]:
# Add the corresponding latitude & longitude for each node
heat_df['lat1'] = heat_df.apply (lambda row: nodes_dict[row.node1]['lat'],axis=1)
heat_df['lon1'] = heat_df.apply (lambda row: nodes_dict[row.node1]['lon'],axis=1)
heat_df['lat2'] = heat_df.apply (lambda row: nodes_dict[row.node2]['lat'],axis=1)
heat_df['lon2'] = heat_df.apply (lambda row: nodes_dict[row.node2]['lon'],axis=1)
heat_df = heat_df[['node1','lat1','lon1','node2','lat2','lon2','heat']]
print(heat_df.shape)
heat_df.head()

(12660, 7)


Unnamed: 0,node1,lat1,lon1,node2,lat2,lon2,heat
0,2657469978,46.5209304,6.6358808,3786724226,46.5208706,6.6357914,995
1,3787835432,46.5207605,6.6356271,4601360744,46.5208475,6.6357465,995
2,3786724226,46.5208706,6.6357914,4601360744,46.5208475,6.6357465,995
3,2223091605,46.5206503,6.6351191,3786724165,46.5206473,6.6352327,995
4,2223091603,46.5206507,6.635443,3025305734,46.5206458,6.6353659,995


In [123]:
# Save to csv
heat_df.to_csv("../data/heatmap_data.csv", index=False)