In [1]:
import xml.etree.ElementTree as ET
from pylab import *
import networkx as nx

# Parsing the osm file using iterparse

In [2]:
%%time

def getxy(lat,lon):
    R = 6378000             # Radius of earth
    latr = lat*pi/180
    lonr = lon*pi/180
    x = R*cos(latr)*cos(lonr)
    y = R*cos(latr)*sin(lonr)
    return [x,y]

def parser(source):
    node = {}               # {'node_ID1' : [x1,y1], 'node_ID2' : [x2,y2],.... }
    node_data = {}          # same as node but with lat and lon instead of x,y
    way = {}                # {'way_ID1' : [nodes 1], 'way_ID2' : [nodes 2],.... }
    G = []                  # a list of edges - [(node1,node2,dist12),(node3,node4,dist34), ...]
    
    for event, elem in ET.iterparse(source):
        if elem.tag == "node":
            ID = elem.get('id')
            lat = float(elem.get('lat'))
            lon = float(elem.get('lon'))
            node[ID] = getxy(lat,lon)
            node_data[ID] = [lat,lon]
            elem.clear()
        if elem.tag == "way":
            path = []
            ID = elem.get('id')
            for nd in elem:
                if nd.tag == "nd":
                    path.append(nd.get('ref'))
            way[ID] = path
            for i in range(len(path)-1):
                [x1,y1] = node[path[i]]
                [x2,y2] = node[path[i+1]]
                dst = sqrt((x2-x1)**2 + (y2-y1)**2)
                G.append((path[i],path[i+1],dst)) # a list of tuples
            elem.clear()
        
    return node_data,way,G

Wall time: 0 ns


In [2]:
%%time

def getxy(lat,lon):
    # gets X,Y xoordinates using latitude and longitude
    R = 6378000             # Radius of earth
    latr = lat*pi/180
    lonr = lon*pi/180
    x = R*cos(latr)*cos(lonr)
    y = R*cos(latr)*sin(lonr)
    return [x,y]

def node_attributes(elem):
    # Given a node element, gets its attributes like ID,latitude,longitude
    ID = elem.get('id')
    lat = float(elem.get('lat'))
    lon = float(elem.get('lon'))
    return ID,lat,lon

def way_attributes(elem):
    path = []
    ID = elem.get('id')
    for nd in elem:
        if nd.tag == "nd":
            path.append(nd.get('ref'))
    return ID,path  
        
def distance(x1,y1,x2,y2):
    return sqrt((x2-x1)**2 + (y2-y1)**2)
    
    
def parser(source):
    
    node = {}               # {'node_ID1' : [x1,y1], 'node_ID2' : [x2,y2],.... }
    node_data = {}          # same as node but with lat and lon instead of x,y
    way = {}                # {'way_ID1' : [nodes 1], 'way_ID2' : [nodes 2],.... }
    G = []                  # a list of edges - [(node1,node2,dist12),(node3,node4,dist34), ...]
        
    for event, elem in ET.iterparse(source):
        if elem.tag == "node":
            ID,lat,lon = node_attributes(elem)
            node[ID] = getxy(lat,lon)
            node_data[ID] = [lat,lon]
            elem.clear()
        
        if elem.tag == "way":
            ID,path = way_attributes(elem)
            for i in range(len(path)-1):
                [x1,y1] = node[path[i]]
                [x2,y2] = node[path[i+1]]
                d = distance(x1,y1,x2,y2)
                G.append((path[i],path[i+1],d)) # a list of tuples
            way[ID] = path
            elem.clear()
        
    return node_data,way,G

Wall time: 0 ns


In [3]:
%%time
node_data,way,G = parser('map_bangalore')

Wall time: 47.9 s


In [4]:
print("No of nodes - " , len(node_data))
print("No of edges - " , len(G))

No of nodes -  1825940
No of edges -  1840778


In [5]:
m = {k: node_data[k] for k in list(node_data)[:5]}
n = {k: way[k] for k in list(way)[:5]}
print("top 5 elemets in node_data - \n", m, "\n")
print("top 5 elemets in way - \n", n, "\n")
print("top 5 elemets in G -  \n", G[:5], "\n")

top 5 elemets in node_data - 
 {'17327095': [12.9105419, 77.5987208], '17327106': [12.9171592, 77.5858319], '17327139': [12.9349712, 77.624083], '17327141': [12.9384931, 77.6291682], '17327147': [12.9541534, 77.6411955]} 

top 5 elemets in way - 
 {'4354938': ['3897890813', '431610691', '431610692', '431610693', '431610695'], '4354952': ['26529612', '26529613', '26529615', '26529616', '26529620', '26529621', '26529622', '26529623', '26529624', '26529625', '26529626', '26529627', '26529628', '26529629', '26529630', '26529631', '26529634', '26529635', '26529636', '26529637', '26529638', '26529639', '26529640', '26529641', '26529642', '26529643', '26529644', '4209231254', '3309435420', '4209231271', '3309435421', '3309435422', '3309435423', '3309435424'], '4354953': ['246397923', '6834406255', '3343265828', '3400262500', '4562270100', '334484150'], '4361247': ['26583024', '26583025', '1224316306', '1224316325', '1224316340', '1224316316', '3771990924', '26583026', '1853174107', '26583027'

# Total length of roads

In [6]:
path_dist = 0
for i in range(len(G)):
    path_dist += G[i][2]
print("IF WE CONSIDER SUM OF ALL PATH DISTANCES, THEN")
print("Toal length of road in metres = ", path_dist)
print("which is = ", path_dist/1000, "km")

IF WE CONSIDER SUM OF ALL PATH DISTANCES, THEN
Toal length of road in metres =  19068595.342864975
which is =  19068.595342864974 km


In [7]:
%%time
M = nx.Graph()
M.add_weighted_edges_from(G)

Wall time: 6.59 s


In [8]:
len(M.nodes()),len(M.edges())

(1802819, 1833780)

In [9]:
p = M.size(weight='weight')
print("IF WE CONSIDER SUM OF EDGE WEIGHTS")
print("Toal length of road in km = ", p/1000)

IF WE CONSIDER SUM OF EDGE WEIGHTS
Toal length of road in km =  18832.29253985947


Both are wrong. Only, Finding Total_path_length through a graph algorithm would yield proper result 

In [13]:
labels = nx.get_edge_attributes(M,'weight')
labels    

{('3897890813', '431610691'): 322.09039262684814,
 ('3897890813', '3897890814'): 17.949417023532348,
 ('431610691', '431610692'): 181.5585927774593,
 ('431610692', '431610693'): 154.70047215171942,
 ('431610693', '431610695'): 193.99559471368426,
 ('431610693', '1808901703'): 57.84086215329978,
 ('431610695', '3359027605'): 60.58346698722461,
 ('431610695', '431610697'): 30.534829168646556,
 ('26529612', '26529613'): 4.632946762322695,
 ('26529613', '26529615'): 8.347070547422895,
 ('26529615', '26529616'): 24.517309356036293,
 ('26529616', '26529620'): 145.62994753191614,
 ('26529620', '26529621'): 36.645358107665636,
 ('26529621', '26529622'): 29.51511220136987,
 ('26529622', '26529623'): 27.77552505738836,
 ('26529623', '26529624'): 44.04143700037127,
 ('26529624', '26529625'): 50.2086971320513,
 ('26529625', '26529626'): 22.255455308845868,
 ('26529626', '26529627'): 16.51920596082031,
 ('26529627', '26529628'): 19.462044198329455,
 ('26529628', '26529629'): 6.995690633733733,
 ('2

In [26]:
list1 = []
for i in labels:
    list1.append((i,labels[i]))
len(list1)

1833780

In [22]:
with open("edge_with_weight.txt","w") as f:
    for i in list1:
        f.write('%s \n' %str(i))

# Edges 

In [None]:
with open("edges.txt","w") as f:
    for i in M.edges():
        f.write('%s\n' % str(i))

In [14]:
G8 = [i[0:2] for i in G]
SG8 = list(set(G8))

In [15]:
Gd = {}
for i in SG8:
    if i[0] not in Gd:
        Gd[i[0]] = []
    Gd[i[0]].append(i[1])
len(Gd)    

1795054

In [16]:
count = 0
for i in Gd:
    for j in Gd[i]:
        if j in Gd:
            for k in Gd[j]:
                if k == i:
                    count += 1 
c = count/2
c

3394.0

In [17]:
s = len(G8)-len(SG8)
s

3604

In [18]:
ME = list(M.edges)

In [19]:
len(G)-c-s, len(ME)

(1833780.0, 1833780)

In [20]:
print(len(G8)-len(SG8))
print(len(G),len(ME),len(SG8),len(G)-len(ME),len(SG8)-len(ME)) 

3604
1840778 1833780 1837174 6998 3394


# TRY

In [84]:
import csv

with open("out.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(G)

In [87]:
with open("no_weights.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(G8)