In [1]:
import pandas as pd
import numpy as np
from collections import Counter

In [12]:
def sortoutedges(edge_data, undirected = True):
    if undirected == True:
        # becomes a tuple
        edge_data['uv'] = [(getattr(edge, 'start_node_id'), getattr(edge, 'end_node_id')) for edge in edge_data.itertuples()]
        # becomes a tuple
        edge_data['vu'] = [(getattr(edge, 'end_node_id'), getattr(edge, 'start_node_id')) for edge in edge_data.itertuples()]

        uv_list = [] #records (u,v), if u >v, records (v,u)
        for edge in edge_data.itertuples():
            u = getattr(edge, 'start_node_id')
            v = getattr(edge, 'end_node_id')
            if u > v:
                uv_list.append((v,u))
            else:
                uv_list.append((u,v))
        # count the times that each pair of (u,v) exists. facilitate the following processing as some roads are single-way
        result = Counter(uv_list) 
        uv_list_unique = list(result.keys()) # record one of the links in the two-way roads and record the links in single-way road

        uv_list_1 = [uv for uv,number in result.items() if number ==1] # represent single-way roads 
        uv_list_2 = [uv for uv,number in result.items() if number == 2] # represent two-way roads  

        # as these roads are two-way, one of the links in each road (start_node_id > end_node_id) is removed
        edge_data_twoways = edge_data[edge_data['uv'].isin(uv_list_2)]
        # uv_list_1 is records by sorting the two nodes in ascending order, maybe single-way (uv) is not in uv_list_1
        # for example, the single-way link (u = 203, v = 197), then (203, 197) not in uv_list_1
        # therefore, use ['uv'] and ['vu'] to search the single-way
        edge_data_singleway = edge_data[(edge_data['uv'].isin(uv_list_1)) | (edge_data['vu'].isin(uv_list_1))]
        edge_data_new = edge_data_twoways.append(edge_data_singleway)
        edge_data_new.reset_index(drop = True, inplace = True)
        edge_data_new['link_id'] = np.arange(edge_data_new.shape[0])

    else:
        edge_data_new = edge_data.copy()
        
    edge_data_new = edge_data_new[['link_id','start_node_id','end_node_id','start_osmid','end_osmid','length','maxmph','lanes']]
    edge_data_new.rename(columns = {'maxmph':'speed_mph', 'length':'edge_length', 'start_node_id':'u', 'end_node_id':'v', 
                                    'start_osmid':'osmid_u', 'end_osmid':'osmid_v','link_id':'uniqueid'}, inplace = True)
    edge_data_new['osmid_u'] = 1
    edge_data_new['osmid_v'] = 2
    edge_data_new['edge_length'] = np.round(edge_data_new['edge_length'],0)
    edge_data_new['edge_length'] = edge_data_new['edge_length'].astype('int')
    return edge_data_new


In [13]:
path = './berkeley_links.csv'
edge_data = pd.read_csv(path)
edge_data_new = sortoutedges(edge_data, undirected = False)
edge_data_new.to_csv('./berkeley_edges_processed.csv', index = False)

In [6]:
def sortoutnodes(node_data):
    node_data['index'] = node_data['node_id']
    node_data['x'] = node_data['lon']
    node_data['y'] = node_data['lat']
    node_data['osmid'] = 1
    node_data['ref'] = 'NA'
    node_data['highway'] = 'NA'
    node_data = node_data[['osmid','x','y','ref','highway','index']]
    return node_data

In [7]:
path = './berkeley_nodes.csv'
node_data = pd.read_csv(path)
node_data_new = sortoutnodes(node_data)
node_data_new.to_csv('./berkeley_nodes_processed.csv', index = False)
