In [1]:
import networkx as nx
import zipfile
import xml.etree.ElementTree as ET
from pylab import *
from math import radians, cos, sin, asin, sqrt, atan2 

In [22]:
%%time

R = 6371000             # Radius of earth
    
def getxy(lat,lon):
    # gets X,Y xoordinates using latitude and longitude
    R = 6371000             # Radius of earth
    latr = lat*pi/180
    lonr = lon*pi/180
    x = R*cos(latr)*cos(lonr)
    y = R*cos(latr)*sin(lonr)
    return [x,y]


def node_attributes(elem):
    # Given a node element, gets its attributes like ID,latitude,longitude
    ID = elem.get('id')
    lat = float(elem.get('lat'))
    lon = float(elem.get('lon'))
    return ID,lat,lon

def way_attributes(elem):
    path = []
    ID = elem.get('id')
    for nd in elem:
        if nd.tag == "nd":
            path.append(nd.get('ref'))
    return ID,path  

def distance(x1,y1,x2,y2):
    return sqrt((x2-x1)**2 + (y2-y1)**2)   

def haversine_dist(lat1, lon1, lat2, lon2):
    p = pi/180
    a = (sin(((lat2-lat1)*p)/2)**2)  + (cos(lat1*p) * cos(lat2*p) * (sin(((lon2-lon1)*p)/2)**2))
    return 2 * R * atan2(sqrt(a),sqrt(1-a)) #2*R*asin...
    
def haversine_dist1(lat1, lon1, lat2, lon2):
    p = pi/180
    a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p) * cos(lat2*p) * (1-cos((lon2-lon1)*p))/2
    return 2 * R * asin(sqrt(a)) #2*R*asin...
    

def parser_xy_dist(source):
    
    node = {}               # {'node_ID1' : [x1,y1], 'node_ID2' : [x2,y2],.... }
    node_data = {}          # same as node but with lat and lon instead of x,y
    way = {}                # {'way_ID1' : [nodes 1], 'way_ID2' : [nodes 2],.... }
    G = []                  # a list of edges - [(node1,node2,dist12),(node3,node4,dist34), ...]
        
    for event, elem in ET.iterparse(source):
        if elem.tag == "node":
            ID,lat,lon = node_attributes(elem)
            node[ID] = getxy(lat,lon)
            node_data[ID] = [lat,lon]
            elem.clear()
        
        if elem.tag == "way":
            ID,path = way_attributes(elem)
            for i in range(len(path)-1):
                [x1,y1] = node[path[i]]
                [x2,y2] = node[path[i+1]]
                d = distance(x1,y1,x2,y2)
                G.append((path[i],path[i+1],d)) # a list of tuples
            way[ID] = path
            elem.clear()
        
    return node_data,way,G

    
def parser_haversine(source):
    
    node = {}               # {'node_ID1' : [x1,y1], 'node_ID2' : [x2,y2],.... }
    node_data = {}          # same as node but with lat and lon instead of x,y
    way = {}                # {'way_ID1' : [nodes 1], 'way_ID2' : [nodes 2],.... }
    G = []                  # a list of edges - [(node1,node2,dist12),(node3,node4,dist34), ...]
    
    for event, elem in ET.iterparse(source):
        if elem.tag == "node":
            ID,lat,lon = node_attributes(elem)
            #node[ID] = getxy(lat,lon)
            node_data[ID] = [lat,lon]
            elem.clear()
        
        if elem.tag == "way":
            ID,path = way_attributes(elem)
            for i in range(len(path)-1):
                [lat1,lon1] = node_data[path[i]]
                [lat2,lon2] = node_data[path[i+1]]
                d = haversine_dist1(lat1, lon1, lat2, lon2)
                G.append((path[i],path[i+1],d)) # a list of tuples
            way[ID] = path
            elem.clear()
        
    return node_data,way,G
    

Wall time: 0 ns


# Main()

In [23]:
%%time
with zipfile.ZipFile('map.zip') as z:
        for a in z.namelist():
            with z.open(a) as f:
                node_data,way,G = parser_haversine(f)

Wall time: 31.3 s


# for haversine

In [18]:
G[1], len(G)

(('431610691', '431610692', 181.63306848583315), 1840778)

In [14]:
%%time
M = nx.Graph()
M.add_weighted_edges_from(G)
print(len(M.nodes()),len(M.edges()))

p = M.size(weight='weight')
print("IF WE CONSIDER SUM OF EDGE WEIGHTS")
print("Toal length of road in km = ", p/1000)

1802819 1833780
IF WE CONSIDER SUM OF EDGE WEIGHTS
Toal length of road in km =  28923.104345403364
Wall time: 8.44 s


In [15]:
d = 0
for i in G:
    d += i[2]
d/1000

29271.813747008833

# for haversine1

In [19]:
G[1], len(G)

(('431610691', '431610692', 181.63306848583315), 1840778)

In [20]:
%%time
M = nx.Graph()
M.add_weighted_edges_from(G)
print(len(M.nodes()),len(M.edges()))

p = M.size(weight='weight')
print("IF WE CONSIDER SUM OF EDGE WEIGHTS")
print("Toal length of road in km = ", p/1000)

1802819 1833780
IF WE CONSIDER SUM OF EDGE WEIGHTS
Toal length of road in km =  28923.07130067917
Wall time: 7.31 s


In [21]:
d = 0
for i in G:
    d += i[2]
d/1000

29271.780456257642

# for xy dist

In [5]:
G[1], len(G)

(('431610691', '431610692', 181.3593280944386), 1840778)

In [7]:
%%time
M = nx.Graph()
M.add_weighted_edges_from(G)
print(len(M.nodes()),len(M.edges()))

p = M.size(weight='weight')
print("IF WE CONSIDER SUM OF EDGE WEIGHTS")
print("Toal length of road in km = ", p/1000)

1802819 1833780
IF WE CONSIDER SUM OF EDGE WEIGHTS
Toal length of road in km =  18811.62367065609
Wall time: 8.13 s


In [8]:
d = 0
for i in G:
    d += i[2]

In [9]:
d/1000

19047.66712596479

# TRY

In [None]:
m = {k: node_data[k] for k in list(node_data)[:5]}
n = {k: way[k] for k in list(way)[:5]}
print("top 5 elemets in node_data - \n", m, "\n")
print("top 5 elemets in way - \n", n, "\n")
print("top 5 elemets in G -  \n", G[:5], "\n")

In [None]:


def from_zip(Zipfile):
    with zipfile.ZipFile(Zipfile) as z:
        for map in z.namelist():
            with z.open(map) as f:
                #print(map)
                node_data,way,G = parser(f)