In [1]:
import xml.etree.ElementTree as ET
from pylab import *
import networkx as nx
import pickle

In [2]:
def store(data, name):
    with open('../0_map_data_processed/'+name+'.p','wb') as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)

# Parsing the osm file using iterparse

In [3]:
def getxy(lat,lon):
    # gets X,Y xoordinates using latitude and longitude
    R = 6378000             # Radius of earth
    latr = lat*pi/180
    lonr = lon*pi/180
    x = R*cos(latr)*cos(lonr)
    y = R*cos(latr)*sin(lonr)
    return [x,y]

def node_attributes(elem):
    # Given a node element, gets its attributes like ID,latitude,longitude
    ID = elem.get('id')
    lat = float(elem.get('lat'))
    lon = float(elem.get('lon'))
    return ID,lat,lon

def way_attributes(elem):
    path = []
    ID = elem.get('id')
    for nd in elem:
        if nd.tag == "nd":
            path.append(nd.get('ref'))
    return ID,path  

def checkroad(way):
    for nd in way:
        if(nd.tag == 'tag'):
            if(nd.attrib['k'] == "highway"):
                return True
                break
    return False
        
def distance(x1,y1,x2,y2):
    return sqrt((x2-x1)**2 + (y2-y1)**2)
    
    
def parser(source):
    
    useful_nodes = {}
    node = {}               # {'node_ID1' : [x1,y1], 'node_ID2' : [x2,y2],.... }
    node_data = {}          # same as node but with lat and lon instead of x,y
    way = {}                # {'way_ID1' : [nodes 1], 'way_ID2' : [nodes 2],.... }
    G = []                  # a list of edges - [(node1,node2,dist12),(node3,node4,dist34), ...]
        
    for event, elem in ET.iterparse(source):
        if elem.tag == "node":
            ID,lat,lon = node_attributes(elem)
            node[ID] = getxy(lat,lon)
            node_data[ID] = [lat,lon]
            elem.clear()
        
        if elem.tag == "way":
            if(not checkroad(elem)):
                continue
            ID,path = way_attributes(elem)
            for i in range(len(path)-1):
                [x1,y1] = node[path[i]]
                [x2,y2] = node[path[i+1]]
                d = distance(x1,y1,x2,y2)
                G.append((path[i],path[i+1],d)) # a list of tuples
                useful_nodes[path[i]] = True
                useful_nodes[path[i+1]] = True
            way[ID] = path
            elem.clear()
        
    return node_data,node,useful_nodes,G

In [4]:
node_data,nodes,useful_nodes,G = parser('../0_map_data_raw/bangalore0')
print('Done')

Done


In [5]:
print("No of nodes - " , len(node_data))
print("No of edges - " , len(G))

No of nodes -  3104016
No of edges -  505146


In [6]:
# %%time
M = nx.Graph()
M.add_weighted_edges_from(G)

In [7]:
print(len(M.nodes()),len(M.edges()))
print(len(useful_nodes))

445653 505083
445653


## Extracting only Useful nodes from list of edges obtained

In [8]:
all_nodes = list(nodes.keys())
for ID in all_nodes:
    if(ID not in useful_nodes):
        del nodes[ID]
        del node_data[ID]

## Storing all the data for use from now on

In [9]:
store(G, 'edges_list')
store(nodes, 'node_xy')
store(node_data, 'node_ll')

In [10]:
print(len(nodes), len(node_data))

445653 445653
