In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE = "sample1_4.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


expected = ["Road", "Cross"]

# UPDATE THIS VARIABLE
mapping = {"Rd.": "Road",
           "Rd'": "Road",
           "Rd": "Road",
           "Roa": "Road",
           "St": "Street",
           "st": "Street",
           "rd": "Road",
           "Naga": "Nagar",
           "crs": "Cross",
           "blk":"Block",
           "cmplx": "Complex",
           "Jct": "Junction"
           
           }


def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected :
            street_types[street_type].add(street_name)


def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")


def audit(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
                    root.clear()
    osm_file.close()
    return street_types



def check_comma(name):
    if name[-1]==',':
        name= name[:-1]
        pass
    if name[0:2]==', ':
        name= name[2:]
        pass
    if name[0]==',':
        name=name[1:]
        pass
    if name[-1]=='.':
        name= name[:-1]
    return name
    

    
    

def update_name(name, mapping):
    
    name= check_comma(name)
    name_list = name.split(" ")
    new_name_list = []
    for word in name_list:
        if len(word)== 0:
            break
        if word not in mapping.keys():
            if word[-1]==',':
                word_new= word[:-1]
                if word_new not in mapping.keys():
                    new_name_list.append(word_new+',')
                else:
                    new_name_list.append(mapping[word_new]+ ',')
            else:
                new_name_list.append(word)
        else:
            new_name_list.append(mapping[word])
    name = " ".join(new_name_list)
    return name

def test():
    st_types = audit(OSMFILE)
    for key in st_types:
        if len(st_types[key])> 3:
            
            for name in st_types[key]:
                better_name = update_name(name, mapping)
                print name, "=>", better_name  
        

if __name__ == '__main__':
    test()

In [4]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSM_FILE = "bengaluru_india.osm"

def update_postcode(postcode):
    if len(postcode)<6:
        return postcode
    if postcode[3]==' ':
        postcode=postcode[0:3]+ postcode[4:7]
        pass
    if postcode[-1]==',':
        postcode=postcode[:-1]
        pass
    if postcode[0:2]=='- ':
        postcode=postcode[2:]
        pass
    if postcode[-1]=='"':
        postcode=postcode[:-1]
        pass
    
    if postcode[-1]=='h':
        postcode=postcode[:-1]
        pass
    if postcode[-1]=='p':
        postcode=postcode[:-1]
        pass
    else:
        return postcode
    return postcode
    
        
def audit(osmfile):
    osm_file = open(osmfile, "r")
    post_types = []
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                    if  tag.attrib['k']=='addr:postcode':
                        if len(tag.attrib['v']) != 6:
                            if tag.attrib['v'] not in post_types:
                                post_types.append(tag.attrib['v'])
                                root.clear()
                                
    osm_file.close()
    for post in post_types:
        newpost= update_postcode(post)
        print post, "=>", newpost
audit(OSM_FILE)     

560 001 => 560001
560 036 => 560036
560 052 => 560052
560003  => 560003 
560 068 => 560068
560 100 => 560100
5600091 => 5600091
560 064 => 560064
5600041 => 5600041
560 020 => 560020
5600011 => 5600011
560 025 => 560025
560 037 => 560037
79 => 79
56007 => 56007
56003 => 56003
560067, => 560067
Bengaluru => Bengaluru
560010, => 560010
560086, => 560086
560075, => 560075
560032, => 560032
5600109 => 5600109
560040p => 560040
5600043 => 5600043
5600037 => 5600037
560 078 => 560078
56066 => 56066
5600 => 5600
560043, => 560043
560068, => 560068
560076, => 560076
560072, => 560072
- 560094, => 560094
- 560068 => 560068
- 560011 => 560011
560027" => 560027
- 560027 => 560027
5560034 => 5560034
560100, => 560100
560080" => 560080
560100" => 560100
560070" => 560070
56006 => 56006
56077 => 56077
- 560076 => 560076
- 560095 => 560095
- 560001 => 560001
560001ph => 560001
- 560051 => 560051
560008, => 560008
- 560034 => 560034
56005 => 56005
560 077 => 560077
560 080 => 560080
560 002 => 560002


In [3]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSM_FILE = "bengaluru_india.osm"

        
def audit(osmfile):
    osm_file = open(osmfile, "r")
    post_types = []
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                    if  tag.attrib['k']=='addr:city':
                        print tag.attrib['v']
                        root.clear()
                                
    osm_file.close()
    
audit(OSM_FILE)  

Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
BTM Layout, Bangalore
BTM Layout, Bangalore
Belandur, Bangalore
Belandur, Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Bangalore
Marathhalli
Marathhalli
Gandhi Nagar, Bangalore
Gandhi Nagar, Bangalore
Bengaluru
Bengaluru
Babusabpalya, Hennur
Babusabpalya, Hennur
Kalyan Nagar, Bangalore
Kalyan Nagar, Bangalore
Bangalore
Bangalore
Marathahalli, Bangalore
Marathahalli, Bangalore
Whitefield, Bangalore
Whitefield, Bangalore
Kundalahalli, Bangalore
Kundalahalli, Bangalore
Hoodi, Mahadevapura, Bangalore
Hoodi, Mahadevapura, Bangalore
Hoodi, Mahadevapura, Bangalore
Hoodi, Mahadevapura, Bangalore
Hoodi, Mahadevapura, Bangalore
Hoodi, Mahadevapura, Bangalore
Marathahalli, Bangalore
Marathahalli, Bangalore
Basaveshwara Nagar, Bangalore
Basaveshwara Nagar, Bangalore
Basaveshwara Nagar, Bangalore
Basaveshwara Nagar, Bangalore
Basaveshwara Nagar, Bangalore
Basaveshwara Nagar, Bangalore
Basaveshwa

In [5]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSM_FILE = "bengaluru_india.osm"

        
def audit(osmfile):
    osm_file = open(osmfile, "r")
    post_types = []
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                    if  tag.attrib['k']=='addr:city':
                        print tag.attrib['k'],tag.attrib['v']     
                        
                        root.clear()
        
                                
    osm_file.close()
    
audit(OSM_FILE)  

addr:street Bannerghatta Road
addr:street Bannerghatta Road
addr:street Bannerghatta Road
addr:street Bannerghatta Road
addr:city Bangalore
addr:street 27th Main
addr:city Bangalore
addr:street 27th Main
addr:street Outer Ring Road
addr:street Outer Ring Road
addr:street Outer Ring Road
addr:street Outer Ring Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:city Bangalore
addr:street 27th Main
addr:city Bangalore
addr:street 27th Main
addr:city Bangalore
addr:street Velankani Drive Opposite BHEL Side Gate, Velankanni Drive, Phase 1, Electronic City
addr:city Bangalore
addr:street Velankani Drive Opposite BHEL Side Gate, Velankanni Drive, Phase 1, Electronic City
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street Sarjapur Road
addr:street 2nd Main Road
addr:street 2nd Main Road
addr:street Elephant Cave Road
addr:street Elephant Cave

In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSM_FILE = "bengaluru_india.osm"

        
def audit(osmfile):
    osm_file = open(osmfile, "r")
    post_types = []
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                    if  tag.attrib['k']=='addr:full':
                        print tag.attrib['k'],tag.attrib['v']
                    
                        root.clear()
                        
                                
    osm_file.close()
    
audit(OSM_FILE)  

addr:full Devasandra Circle
addr:full Devasandra Circle
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:full HIG Colony, RMV Second Stage
addr:ful

In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSM_FILE = "bengaluru_india.osm"

        
def audit(osmfile):
    osm_file = open(osmfile, "r")
    post_types = []
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                    if  tag.attrib['k']=='addr:street':
                        value = tag.attrib['v']
                        if value.find(',') != -1 :
                            print value 
                            root.clear()
                                      
    osm_file.close()
    
audit(OSM_FILE)  

Velankani Drive Opposite BHEL Side Gate, Velankanni Drive, Phase 1, Electronic City
Velankani Drive Opposite BHEL Side Gate, Velankanni Drive, Phase 1, Electronic City
2nd Stage, 16th Main
2nd Stage, 16th Main
Munekolalu Village, Varthur Hobli, Bangalore-Varthur Road
Munekolalu Village, Varthur Hobli, Bangalore-Varthur Road
2nd Cross, 6th Main Road
2nd Cross, 6th Main Road
Brookfield Road, Whitefield
Brookfield Road, Whitefield
1st Main Road, EPIP Area
1st Main Road, EPIP Area
B Block, AECS Layout
B Block, AECS Layout
Outer Ring Road (Near Shiva Ganga Layout), Mahadevapura
Outer Ring Road (Near Shiva Ganga Layout), Mahadevapura
ITPL Main Road, near Hoodi Circle
ITPL Main Road, near Hoodi Circle
Sampige Road, Malleshwaram
Sampige Road, Malleshwaram
1st Main Road, New BEL Road
1st Main Road, New BEL Road
11th Main Road, Opp Railway Station, Malleswaram
11th Main Road, Opp Railway Station, Malleswaram
Kaadubeesanahalli Rd, Devarabisanahalli,  Bellandur,
Kaadubeesanahalli Rd, Devarabisanah