In [1]:
%matplotlib qt
import xml.etree.ElementTree as ET
from pylab import *
import networkx as nx
import pickle
import numpy as np

In [2]:
def store(data, name):
    with open('../0_map_data_processed/'+name+'.p','wb') as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)

# Parsing the osm file using iterparse

In [9]:
def getxy(lat,lon):
    # gets X,Y xoordinates using latitude and longitude
    R = 6378000             # Radius of earth
    latr = lat*pi/180
    lonr = lon*pi/180
    x = R*cos(latr)*cos(lonr)
    y = R*cos(latr)*sin(lonr)
    return [x,y]

def node_attributes(elem):
    # Given a node element, gets its attributes like ID,latitude,longitude
    ID = elem.get('id')
    lat = float(elem.get('lat'))
    lon = float(elem.get('lon'))
    return ID,lat,lon

def way_attributes(elem):
    path = []
    ID = elem.get('id')
    for nd in elem:
        if nd.tag == "nd":
            path.append(nd.get('ref'))
    return ID,path  

def checkway(way,check):
    for nd in way:
        if(nd.tag == 'tag'):
            if(nd.attrib['k'] == check):
                return True
                break
    return False
        
def distance(x1,y1,x2,y2):
    return sqrt((x2-x1)**2 + (y2-y1)**2)
    
    
def parser(source):
    
    useful_nodes = {}
    node = {}               # {'node_ID1' : [x1,y1], 'node_ID2' : [x2,y2],.... }
    node_data = {}          # same as node but with lat and lon instead of x,y
    way = {}                # {'way_ID1' : [nodes 1], 'way_ID2' : [nodes 2],.... }
    G = []        # a list of edges - [(node1,node2,dist12),(node3,node4,dist34), ...]
    buildings = []
        
    for event, elem in ET.iterparse(source):
        if elem.tag == "node":
            ID,lat,lon = node_attributes(elem)
            node[ID] = getxy(lat,lon)
            node_data[ID] = [lat,lon]
            elem.clear()
        
        if elem.tag == "way":
            if(checkway(elem, "highway")):
                ID,path = way_attributes(elem)
                for i in range(len(path)-1):
                    [x1,y1] = node[path[i]]
                    [x2,y2] = node[path[i+1]]
                    d = distance(x1,y1,x2,y2)
                    G.append((path[i],path[i+1],d)) # a list of tuples
                    useful_nodes[path[i]] = True
                    useful_nodes[path[i+1]] = True
                way[ID] = path
                elem.clear()
            if(checkway(elem, "building")):
                ID,path = way_attributes(elem)
                buildings.append([])
                for i in range(len(path)-1):
                    [x1,y1] = node[path[i]]
                    [x2,y2] = node[path[i+1]]
                    buildings[-1].append((path[i],path[i+1])) # a list of tuples
                elem.clear()
        
    return node_data,node,useful_nodes,G,buildings

In [18]:
node_data,nodes,useful_nodes,G,buildings = parser('../0_map_data_raw/bangalore0')
print('Done')

Done


In [19]:
print("No of nodes - " , len(node_data))
print("No of edges - " , len(G))
print("No of buildings - ", len(buildings))

No of nodes -  3104016
No of edges -  505146
No of buildings -  587396


In [12]:
# %%time
M = nx.Graph()
M.add_weighted_edges_from(G)

In [22]:
print(len(M.nodes()),len(M.edges()))
print(len(useful_nodes))

445653 505083
445653


In [20]:
def poly_area(x,y):
    correction = x[-1] * y[0] - y[-1]* x[0]
    main_area = np.dot(x[:-1], y[1:]) - np.dot(y[:-1], x[1:])
    return 0.5*np.abs(main_area + correction)

In [25]:
for x,y,d in G:
    x1,y1 = nodes[x]
    x2,y2 = nodes[y]
#     plot([-x1,-x2],[-y1,-y2],'-r',linewidth=0.5)
building_detail=[]    
for building in buildings:
    x_array=[]
    y_array=[]
    for x,y in building:
        x1,y1 = nodes[x]
        x2,y2 = nodes[y]
        x_array.extend((x1,x2))
        y_array.extend((y1,y2))
#         plot([-x1,-x2],[-y1,-y2],'-b',linewidth=1)
    x_mid=sum(x_array)/len(x_array)
    y_mid=sum(y_array)/len(y_array)
    area =poly_area(x_array,y_array)
    building_detail.append([x_mid,y_mid,area])    
# show()

In [31]:
print("building boundaries")
print(buildings[:5])
print("building details")
print(building_detail[:5])
print('no of buildings',len(building_detail))

building boundaries
[[('254438362', '254438363'), ('254438363', '254438364'), ('254438364', '254438365'), ('254438365', '254438362')], [('263870609', '263870628'), ('263870628', '263870646'), ('263870646', '263870666'), ('263870666', '263870609')], [('263871052', '263871073'), ('263871073', '263871078'), ('263871078', '263871083'), ('263871083', '263871052')], [('264111556', '264111557'), ('264111557', '264111558'), ('264111558', '264111559'), ('264111559', '264111556')], [('264112138', '264112139'), ('264112139', '264112140'), ('264112140', '264112141'), ('264112141', '264112138')]]
building details
[[1337177.7083392432, 6069845.451986134, 2146.015625], [1327990.8131399327, 6072924.47516153, 187.90625], [1327915.6947610066, 6072939.764428042, 351.328125], [1328019.3683700706, 6072907.2775828, 173.40234375], [1327976.8200176554, 6072916.4149402, 154.5859375]]
no of buildings 587396


## Extracting only Useful nodes from list of edges obtained

In [28]:
all_nodes = list(nodes.keys())
for ID in all_nodes:
    if(ID not in useful_nodes):
        del nodes[ID]
        del node_data[ID]

## Storing all the data for use from now on

In [29]:
print(len(nodes), len(node_data))

445653 445653


In [30]:
store(G, 'edges_list')
store(nodes, 'node_xy')
store(node_data, 'node_ll')
store(buildings, 'building_boundaries')
store(building_detail, 'building_details')