In [1]:
#import csv
import pandas as pd
#import sqlite3
#import csv
#import codecs
#import pprint
#import re
#import xml.etree.cElementTree as ET
#import cerberus
#import schema

In [2]:
#nodes = pd.read_csv("nodes.csv")
nodes_tags = pd.read_csv("nodes_tags.csv")
#ways = pd.read_csv('ways.csv')
#ways_nodes = pd.read_csv("ways_nodes.csv")
ways_tags = pd.read_csv("ways_tags.csv")

# Download OpenStreemMap XML file to local file using web api overpass-api.de query form.

In [2]:
"""from data.py
This will extract fields from the OpenStreetMap XML file and store various field values in ways and nodes CSV files.
Parsing of field values includig ':' and separately problem characters (not a-z,:) is done.  The field vlaues that are 
extracted are included in the 'NODE_FIELDS','NODE_TAGS_PATH','WAYS_PATH','WAY_NODES_PATH','WAY_TAGS_PATH' lists.  The 
schema is validated as well.

A small subset of the data was validated with the provided function and then validation set to False for larger data set.
"""

OSM_PATH = "Phx_metro.osm"

NODES_PATH = "nodes.csv"
NODE_TAGS_PATH = "nodes_tags.csv"
WAYS_PATH = "ways.csv"
WAY_NODES_PATH = "ways_nodes.csv"
WAY_TAGS_PATH = "ways_tags.csv"

LOWER_COLON = re.compile(r'^([a-z]|_)+:([a-z]|_)+')
PROBLEMCHARS = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')

SCHEMA = schema.schema

# Make sure the fields order in the csvs matches the column order in the sql table schema
NODE_FIELDS = ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp']
NODE_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_FIELDS = ['id', 'user', 'uid', 'version', 'changeset', 'timestamp']
WAY_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_NODES_FIELDS = ['id', 'node_id', 'position']


def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements
    

    if element.tag == 'node':
        #build top level node_attributes
        for att in NODE_FIELDS:
            node_attribs[att] = element.attrib[att]
        
        #build node_tags
        position = 0
        for child in element:
            if child.tag == 'tag':
                tag = {}
                tag['id'] = element.attrib['id']
                #parse k attrib with a colon or two
                if bool(LOWER_COLON.search(child.attrib['k'])):
                    k = child.attrib['k'].split(':')
                    if len(k) == 3:
                        tag['key']=k[1]+':'+k[2]
                    elif len(k) == 2:
                        tag['key'] = k[1]
                    tag['type']= k[0]
                    tag['value'] = child.attrib['v'] 
                    
                elif not bool(PROBLEMCHARS.search(child.attrib['k'])):
                    tag['type'] = 'regular'
                    tag['value']=child.attrib['v']
                    tag['id'] = element.attrib['id']
                    tag['key'] = child.attrib['k']
            
                tags.append(tag)
        return {'node': node_attribs, 'node_tags' : tags}
                
    elif element.tag == 'way':
        #build top level way_attribs
        for att in WAY_FIELDS:
            way_attribs[att] = element.attrib[att]
        
        #build way_tags
        position = 0
        for child in element:
            if child.tag == 'tag':
                tag = {}
                tag['id'] = element.attrib['id']
                #parse k attrib with a colon or two
                if bool(LOWER_COLON.search(child.attrib['k'])):
                    k = child.attrib['k'].split(':')
                    if len(k) == 3:
                        tag['key']=k[1]+':'+k[2]
                    elif len(k) == 2:
                        tag['key'] = k[1]
                    tag['type']= k[0]
                    tag['value'] = child.attrib['v'] 
                    
                elif not bool(PROBLEMCHARS.search(child.attrib['k'])):
                    tag['type'] = 'regular'
                    tag['value']=child.attrib['v']
                    tag['id'] = element.attrib['id']
                    tag['key'] = child.attrib['k']
            
                tags.append(tag)
            
            if child.tag == 'nd':
                nodes = {}
                nodes['id'] = element.attrib['id']
                nodes['node_id'] = child.attrib['ref']
                nodes['position'] = position
                position += 1
                way_nodes.append(nodes)
            
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}


# ================================================== #
#               Helper Functions                     #
# ================================================== #
def get_element(osm_file, tags=('node', 'way', 'relation')):
    """Yield element if it is the right type of tag"""

    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags:
            yield elem
            root.clear()


def validate_element(element, validator, schema=SCHEMA):
    """Raise ValidationError if element does not match schema"""
    if validator.validate(element, schema) is not True:
        field, errors = next(validator.errors.iteritems())
        message_string = "\nElement of type '{0}' has the following errors:\n{1}"
        error_string = pprint.pformat(errors)
        
        raise Exception(message_string.format(field, error_string))


class UnicodeDictWriter(csv.DictWriter, object):
    """Extend csv.DictWriter to handle Unicode input"""

    def writerow(self, row):
        super(UnicodeDictWriter, self).writerow({
            k: (v.encode('utf-8') if isinstance(v, unicode) else v) for k, v in row.iteritems()
        })

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)


# ================================================== #
#               Main Function                        #
# ================================================== #
def process_map(file_in, validate):
    """Iteratively process each XML element and write to csv(s)"""

    with codecs.open(NODES_PATH, 'w') as nodes_file, \
         codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \
         codecs.open(WAYS_PATH, 'w') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \
         codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file:

        nodes_writer = UnicodeDictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS)
        ways_writer = UnicodeDictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS)
        way_tags_writer = UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS)

        nodes_writer.writeheader()
        node_tags_writer.writeheader()
        ways_writer.writeheader()
        way_nodes_writer.writeheader()
        way_tags_writer.writeheader()

        validator = cerberus.Validator()

        for element in get_element(file_in, tags=('node', 'way')):
            el = shape_element(element)
            if el:
                if validate is True:
                    validate_element(el, validator)

                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['way_tags'])


if __name__ == '__main__':
    # Note: Validation is ~ 10X slower. For the project consider using a small
    # sample of the map when validating.
    process_map(OSM_PATH, validate=False)

In [4]:
def db_table_from_csv(db_name, file_name, table_name, keys = [], key_params = []):
    '''file_name: name of csv file, 
    table: name of table to create in db_name, 
    keys: columsn of .csv file_name to include in db table
    key_params: parameters for data type for each field in table'''

    con = sqlite3.connect(db_name)
    cur = con.cursor()

    #Creation of nodes ways table
    cur.execute("DROP TABLE IF EXISTS " + table_name + ";")
    con.commit()
    
    create_str = "" #holds schema string to pass to sql
    for i in range(len(keys)):
        create_str = create_str + keys[i] + " " + key_params[i]+","
    create_str = create_str[:-1]
    cur.execute("CREATE TABLE " + table_name + "("+create_str+");")

    con.commit()
    with open(file_name, 'rb') as fin:
        dr = csv.DictReader(fin)
        to_db = [[i[key].decode("utf-8") for key in keys] for i in dr]
    
    key_str = ""
    key_qs = ""
    for key in keys:
        key_str = key_str+key+","
        key_qs = key_qs + "?," #just how many fields to enter into db
    key_qs = key_qs[:-1] #strip comma at end
    key_str=key_str[:-1]
    
    #insert data into db according to keys
    cur.executemany("INSERT INTO "+table_name+"("+key_str+") VALUES ("+key_qs+");", to_db)
    con.commit()

    con.close()
    return

In [5]:
DB_NAME = 'Phx_metro.db'
NODE_FIELDS = ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp']
NODE_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_FIELDS = ['id', 'user', 'uid', 'version', 'changeset', 'timestamp']
WAY_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_NODES_FIELDS = ['id', 'node_id', 'position']

NODE_PARAMS = ['INTEGER PRIMARY KEY NOT NULL', 'REAL', 'REAL', 'TEXT', 'INTEGER', 'TEXT', 'INTEGER', 'DATE']
NODE_TAGS_PARAMS = ['INTEGER NOT NULL', 'TEXT NOT NULL', 'TEXT NOT NULL', 'TEXT NOT NULL']
WAY_PARAMS = ['INTEGER NOT NULL', 'TEXT NOT NULL', 'INTEGER NOT NULL', 'TEXT NOT NULL', 'INTEGER NOT NULL', 'TEXT NOT NULL']
WAY_NODES_PARAMS = ['INTEGER NOT NULL', 'INTEGER NOT NULL', 'INTEGER NOT NULL']
WAY_TAGS_PARAMS = ['INTEGER NOT NULL', 'TEXT NOT NULL', 'TEXT NOT NULL', 'TEXT NOT NULL']


db_table_from_csv(DB_NAME, 'nodes.csv', 'nodes', NODE_FIELDS, NODE_PARAMS)
db_table_from_csv(DB_NAME, 'nodes_tags.csv', 'nodes_tags', NODE_TAGS_FIELDS, NODE_TAGS_PARAMS)
db_table_from_csv(DB_NAME, 'ways.csv', 'ways', WAY_FIELDS, WAY_PARAMS)
db_table_from_csv(DB_NAME, 'ways_tags.csv', 'way_tags', WAY_TAGS_FIELDS, WAY_TAGS_PARAMS)
db_table_from_csv(DB_NAME, 'ways_nodes.csv', 'way_nodes', WAY_NODES_FIELDS, WAY_NODES_PARAMS)

#### Cities list from http://phoenix.about.com/od/govtcity/qt/cities-towns-maricopa-county.htm

In [4]:
cities = ['Apache Junction','Avondale','Buckeye','Carefree','Cave Creek','Chandler','El Mirage','Fountain Hills',
          'Gila Bend','Gilbert','Glendale','Goodyear','Guadalupe','Litchfield Park','Mesa','Paradise Valley',
          'Peoria','Phoenix','Queen Creek','Scottsdale','Surprise','Tempe','Tolleson','Wickenburg','Youngtown']

## As a measure, just check to see how close the word distance is among city/town names themselves.

In [35]:
for i in range(len(cities)):
    print(cities[i],best_match(cities[i],cities[:i]))

('Apache Junction', ('', 0))
('Avondale', ('Apache Junction', 0.2608695652173913))
('Buckeye', ('Apache Junction', 0.18181818181818182))
('Carefree', ('Buckeye', 0.4))
('Cave Creek', ('Carefree', 0.6666666666666666))
('Chandler', ('Avondale', 0.625))
('El Mirage', ('Cave Creek', 0.42105263157894735))
('Fountain Hills', ('Avondale', 0.36363636363636365))
('Gila Bend', ('Avondale', 0.35294117647058826))
('Gilbert', ('Gila Bend', 0.625))
('Glendale', ('Avondale', 0.625))
('Goodyear', ('Gilbert', 0.4))
('Guadalupe', ('Avondale', 0.5882352941176471))
('Litchfield Park', ('Cave Creek', 0.4))
('Mesa', ('Glendale', 0.3333333333333333))
('Paradise Valley', ('Avondale', 0.43478260869565216))
('Peoria', ('El Mirage', 0.4))
('Phoenix', ('Peoria', 0.46153846153846156))
('Queen Creek', ('Cave Creek', 0.6666666666666666))
('Scottsdale', ('Avondale', 0.5555555555555556))
('Surprise', ('Peoria', 0.42857142857142855))
('Tempe', ('El Mirage', 0.42857142857142855))
('Tolleson', ('Phoenix', 0.4))
('Wickenb

In [5]:
def best_match(text, match_list, case = False):
    from difflib import SequenceMatcher
    max_match = 0
    max_pos = 0
    best_match = ''
    for compare in match_list:
        if case == False:
            match_score = SequenceMatcher(None, text.lower(), compare.lower()).ratio()
        elif case == True:
            match_score = SequenceMatcher(None, text, compare).ratio()
        if match_score > max_match:
            max_match = match_score
            best_match = compare
        else:
            continue
    max_score = max_match
    return (best_match, max_score)

## Testing and implementing the best_match function on the csv data

In [29]:
for city_name in city_data_vals:
    match,score = best_match(city_name, cities)
    if score > 0.6:
        print("Match {} to {}".format(city_name, match))
    else:
        print("No match found for {}".format(city_name))


Match El Mirage to El Mirage
Match Glemdale to Glendale
Match Phenix to Phoenix
No match found for Fort McDowell
Match Golbert to Gilbert
Match Carefree to Carefree
Match Litchfield Park to Litchfield Park
No match found for Morristown
Match Surprise to Surprise
Match Gilbert to Gilbert
No match found for Mayer
Match Goodyear to Goodyear
Match Chandler to Chandler
Match Msa to Mesa
No match found for Higley
No match found for Rio Verde
Match Phoenx to Phoenix
Match tempe to Tempe
Match Tempe to Tempe
No match found for Luke AFB, Waddell
No match found for Laveen Village
Match Peoria to Peoria
Match Apache Junction to Apache Junction
Match Scottsdale to Scottsdale
No match found for Sun City West
Match Queen Creek to Queen Creek
No match found for Sun Lakes
Match Avondale to Avondale
No match found for sun City West
Match Fountain Hills to Fountain Hills
No match found for Luke AFB
No match found for Laveen
Match tEMPE to Tempe
Match peoria to Peoria
Match Paradise Valley, AZ to Paradis

## Now to run the csv data and send out to files

In [8]:
unmatched = []
for i in range(len(ways_tags)):
    if ways_tags['key'][i] == 'city':
        match, score = best_match(ways_tags['value'][i], cities, case=False)
        if score > 0.6:
            ways_tags.loc[i,'value'] = match
        else:
            unmatched.append(ways_tags['value'][i])

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
113000
114000
115000
116000
117000
118000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
132000
133000
134000
135000
136000
137000
138000
139000
140000
141000
142000
143000
144000
145000
146000
147000
148000
149000
150000
151000
152000
153000
154000
155000
156000
157000
158000


In [12]:
set(unmatched)

{'2036 N. Gilbert Rd.',
 'Gold Canyon',
 'Laveen',
 'Laveen Village',
 'Maricopa',
 'Riverside',
 'San Diego',
 'San Tan Valley',
 'Sun City',
 'Sun City West',
 'Tohono Oodham',
 'Wittmann'}

In [11]:
unmatched = []
for i in range(len(nodes_tags)):
    if nodes_tags['key'][i] == 'city':
        match, score = best_match(nodes_tags['value'][i], cities, case=False)
        if score > 0.6:
            nodes_tags.loc[i,'value'] = match
        else:
            unmatched.append(nodes_tags['value'][i])

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
113000
114000
115000
116000
117000
118000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
132000
133000
134000
135000
136000
137000
138000
139000
140000
141000
142000
143000
144000
145000
146000
147000
148000
149000
150000
151000
152000
153000
154000
155000
156000
157000
158000


In [26]:
ways_tags[ways_tags['value'] == "San Diego"]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,id,key,value,type
198777,198777,198777,5618435,name_base,San Diego,tiger


# Now manipulate the 'Street' vs 'St' type inconsistencies

In [9]:
#Just checking to see what types of values there are to be fixed.
set(ways_tags[ways_tags['key'] == 'street']['value'])

{'17054 W Post D',
 '19th Avenue',
 '22nd Place',
 '40th Street',
 '5810 Alameda Rd.',
 'Adobe Street',
 'Allred Avenue',
 'Apache Blvd.',
 'Banner Gateway Drive',
 'Boston Street',
 'Brown Road',
 'Des Moines Street',
 'E Golden Eagle Blvd',
 'E Lynx Place',
 'E Rittenhouse Road',
 'E Stanford Drive',
 'E Terrace Ave',
 'E Warner Rd',
 'E. Davenport Dr.',
 'E. Devonshire Ave.',
 'E. Divot Drive',
 'E. Eagle Crest Drive',
 'E. Elliot Rd',
 'E. Frontier St',
 'E. Longbow Parkway',
 'E. Pima Road',
 'East 10th Street',
 'East 12th Avenue',
 'East 16th Avenue',
 'East 17th Avenue',
 'East 18th Avenue',
 'East 1st Avenue',
 'East 1st Street',
 'East 29th Avenue',
 'East 2nd Street',
 'East 34th Avenue',
 'East 36th Avenue',
 'East 38th Avenue',
 'East 39th Avenue',
 'East 4th Avenue',
 'East 4th Street',
 'East 5th Street',
 'East 6th Place',
 'East 6th Street',
 'East 7th Street',
 'East 8th Street',
 'East 9th Place',
 'East Adams Street',
 'East Adobe Road',
 'East Adobe Street',
 'East

In [49]:
import string
conventions = [['West','W'],['North','N'],["South","S"],['East','E'],['Street',"St"],["Road","Rd"],["Avenue","Av","Ave"],
                            ["Place","Pl"],["Boulevard","Blvd"],["Trail","Tr"],["Place","Pl"],["Highway","Hwy","Hw","Hy"],
                            ["Parkway","Pkwy","Pw"]]
num_conventions = len(conventions)

def conv_street(street_name):
    split_up = street_name.split(" ")
    for i in range(len(split_up)):
        for j in range(num_conventions):
            #Strip punctuation and compare to naming conventions list
            if split_up[i].translate(None, string.punctuation) in conventions[j]:
                split_up[i] = conventions[j][0]
    #piece the street string back together with the change
    whole = ' '.join(c for c in split_up)
    return whole

def conv_df(df):
    #This function takes a pandas dataframe which contains a 'key' column with 'street' values and 
    # converts the values according to the conv_street function
    for i in range(len(df)):
        if df.loc[i,'key'] == 'street':
            df.loc[i,'value'] = conv_street(df.loc[i,'value'])
    return df
        

In [51]:
nodes_tags = conv_df(nodes_tags)
ways_tags = conv_df(ways_tags)

0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
480000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
700000
710000
720000
730000
740000
750000
760000
770000
780000
790000
800000
810000
820000
830000
840000
850000
860000
870000
880000
890000
900000
910000
920000
930000
940000
950000
960000
970000
980000
990000
1000000
1010000
1020000
1030000
1040000
1050000
1060000
1070000
1080000
1090000
1100000
1110000
1120000
1130000
1140000
11500

## Create uniformity in the 'key' = 'state' values that refer to an address

In [None]:
AZ_reps = ['A', 'AS', 'AZ', 'AZ (Arizona)', 'AZZ', 'Arizona', 'Az', 'az']
for i in range(len(ways_tags)):
    if ways_tags.loc[i,'key'] == 'state' and ways_tags.loc[i,'type'] == 'addr' and ways_tags.loc[i,'value'] in AZ_reps:
        ways_tags.loc[i,'value'] = 'AZ'

        
for i in range(len(nodes_tags)):
    if nodes_tags.loc[i,'key'] == 'state' and nodes_tags.loc[i,'type'] == 'addr' and nodes_tags.loc[i,'value'] in AZ_reps:
        nodes_tags.loc[i,'value'] = 'AZ'

## Check for what inconsistencies may exist in the 'key'='county' data in *_tags data

In [9]:
set(nodes_tags[nodes_tags['key'] == 'county']['value'])

{'Maricopa'}

In [10]:
set(ways_tags[ways_tags['key'] == 'county']['value'])

{'Gila, AZ',
 'Maricopa',
 'Maricopa, AZ',
 'Maricopa, AZ:Yavapai, AZ',
 'Pinal, AZ'}

#### This will be easy enought to convert to just contain the county name and not the state abbreviation which should  be in another field for a database.  It will just take some time.

In [15]:
for i in range(len(ways_tags)):
    if ways_tags.loc[i,'key'] == 'county':
        split = ways_tags.loc[i,'value'].split(",")
        if 'Maricopa' == split[0]:
            ways_tags.loc[i,'county'] = 'Maricopa'
        elif 'Gila' == split[0]:
            ways_tags.loc[i,'county'] = 'Gila'
        elif 'Pinal' == split[0]:
            ways_tags.loc[i,'county'] = 'Pinal'
    if i % 1000 == 0:
        print(i)

        

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
111000
112000
113000
114000
115000
116000
117000
118000
119000
120000
121000
122000
123000
124000
125000
126000
127000
128000
129000
130000
131000
132000
133000
134000
135000
136000
137000
138000
139000
140000
141000
142000
143000
144000
145000
146000
147000
148000
149000
150000
151000
152000
153000
154000
155000
156000
157000
158000


In [10]:
'Maricopa' in 'Maricopa, AZ'.split(",")

True

In [None]:
nodes_tags.to_csv("nodes_tags.csv")
ways_tags.to_csv("ways_tags.csv")