How to read this project:
Cell 1 and Cell 2 are the majority of the codes, feel free to only check them when need to, otherwise just skip to the Markdown cells for the narration.

In [2]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import csv
import codecs
import re
import xml.etree.cElementTree as ET

import cerberus

import schema


NODES_PATH = "nodes.csv"
NODE_TAGS_PATH = "nodes_tags.csv"
WAYS_PATH = "ways.csv"
WAY_NODES_PATH = "ways_nodes.csv"
WAY_TAGS_PATH = "ways_tags.csv"

LOWER_COLON = re.compile(r'^([a-z]|_)+:([a-z]|_)+')
PROBLEMCHARS = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')

SCHEMA = schema.schema

# Make sure the fields order in the csvs matches the column order in the sql table schema
NODE_FIELDS = ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp']
NODE_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_FIELDS = ['id', 'user', 'uid', 'version', 'changeset', 'timestamp']
WAY_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_NODES_FIELDS = ['id', 'node_id', 'position']




def shape_element(element):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements
    poscounter = 0 #for way nodes position

    if element.tag == 'node':
        for field in NODE_FIELDS:
            node_attribs[field] = element.attrib[field]
        for tag in element.iter('tag'):
            tag_dict = {}
            tag_dict['id'] = element.attrib['id'] #id (NODE_TAGS_FIELDS)
            
            #key and type (NODE_TAGS_FIELDS)
            if PROBLEMCHARS.match(tag.attrib["k"]):
                pass
            elif ':' in tag.attrib['k']:
                tag_dict['type'] = tag.attrib['k'].split(':')[0]
                tag_dict['key'] = tag.attrib["k"].split(':',1)[1]
            else:
                tag_dict['type'] = 'regular'
                tag_dict['key'] = tag.attrib['k']
                
            #value (NODE_TAGS_FIELDS) where most of the data fixing happens
            if tag_dict['type'] == 'addr':
                if tag_dict['key'] == 'state':
                    tag_dict['value'] = update_name(tag.attrib['v'],mapping_state) #fix state
                elif tag_dict['key'] == 'postcode':
                    tag_dict['value'] = update_postcode(tag.attrib['v']) #fix postcode
                elif tag_dict['key'] == 'city':
                    tag_dict['value'] = update_city(tag.attrib['v']) #fix city
                else:
                    tag_dict['value'] = update_name(tag.attrib['v'],mapping_street) #fix street type
            else:
                tag_dict['value'] = tag.attrib['v']
            
            tags.append(tag_dict)
        return {'node': node_attribs, 'node_tags': tags}
        
    elif element.tag == 'way':
        for field in WAY_FIELDS:
            way_attribs[field] = element.attrib[field]
        for nd in element.iter('nd'):
            nd_dict = {}
            nd_dict['id'] = element.attrib['id']
            nd_dict['node_id'] = nd.attrib['ref']
            nd_dict['position'] = poscounter
            poscounter += 1
            way_nodes.append(nd_dict)
        for tag in element.iter('tag'):
            tag_dict = {}
            tag_dict['id'] = element.attrib['id'] #id
            #key and type
            if PROBLEMCHARS.match(tag.attrib["k"]):
                pass
            elif ':' in tag.attrib['k']:
                tag_dict['type'] = tag.attrib['k'].split(':')[0]
                tag_dict['key'] = tag.attrib["k"].split(':',1)[1]
            else:
                tag_dict['type'] = 'regular'
                tag_dict['key'] = tag.attrib['k']
            #value
            if tag_dict['type'] == 'addr':
                if tag_dict['key'] == 'state':
                    tag_dict['value'] = update_name(tag.attrib['v'],mapping_state) #fix state
                elif tag_dict['key'] == 'postcode':
                    tag_dict['value'] = update_postcode(tag.attrib['v']) #fix postcode
                elif tag_dict['key'] == 'city':
                    tag_dict['value'] = update_city(tag.attrib['v']) #fix city
                else:
                    tag_dict['value'] = update_name(tag.attrib['v'],mapping_street) ##fix street type
            else:
                tag_dict['value'] = tag.attrib['v']
            
            tags.append(tag_dict)    
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}


# ================================================== #
#               Helper Functions                     #
# ================================================== #
def get_element(osm_file, tags=('node', 'way', 'relation')):
    """Yield element if it is the right type of tag"""

    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags:
            yield elem
            root.clear()


def validate_element(element, validator, schema=SCHEMA):
    """Raise ValidationError if element does not match schema"""
    if validator.validate(element, schema) is not True:
        field, errors = next(validator.errors.iteritems())
        message_string = "\nElement of type '{0}' has the following errors:\n{1}"
        error_strings = (
            "{0}: {1}".format(k, v if isinstance(v, str) else ", ".join(v))
            for k, v in errors.iteritems()
        )
        raise cerberus.ValidationError(
            message_string.format(field, "\n".join(error_strings))
        )


class UnicodeDictWriter(csv.DictWriter, object):
    """Extend csv.DictWriter to handle Unicode input"""

    def writerow(self, row):
        super(UnicodeDictWriter, self).writerow({
            k: (v.encode('utf-8') if isinstance(v, unicode) else v) for k, v in row.iteritems()
        })

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)


# ================================================== #
#               Main Function                        #
# ================================================== #
def process_map(file_in, validate):
    """Iteratively process each XML element and write to csv(s)"""

    with codecs.open(NODES_PATH, 'w') as nodes_file, \
         codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \
         codecs.open(WAYS_PATH, 'w') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \
         codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file:

        nodes_writer = UnicodeDictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS)
        ways_writer = UnicodeDictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS)
        way_tags_writer = UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS)

        nodes_writer.writeheader()
        node_tags_writer.writeheader()
        ways_writer.writeheader()
        way_nodes_writer.writeheader()
        way_tags_writer.writeheader()

        validator = cerberus.Validator()

        for element in get_element(file_in, tags=('node', 'way')):
            el = shape_element(element)
            if el:
                if validate is True:
                    validate_element(el, validator)

                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['way_tags'])


In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSM_FILE = "toronto_canada.osm"  # Replace this with your osm file
SAMPLE_FILE = "sample.osm"

street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)

expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]

# UPDATE THIS VARIABLE
mapping_street = { "Ehs": "EHS",
            "St": "Street",
            "St.": "Street",
            "street": "Street",
            "STREET": "Street",
            "Ave": "Avenue",
            "Ave.": "Avenue",
            "Rd.": "Road",
            "Rd": "Road",
            "Dr.": "Drive",
            "Dr": "Drive",
            "Blvd": "Boulevard",
            "Blvd.": "Boulevard",
            "Crt": "Court"}
mapping_state = { "Ontario": "ON",
                 "OH": "ON"
               }



def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)


def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")


def audit(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types


def update_name(name, mapping):
    words = name.split() #split string in n words
    last_word = words[-1] #isolate last word
    if last_word in mapping:
        words[-1] = mapping[words[-1]]        
        return ' '.join(words)
    else:
        return name
    
def update_postcode(postcode):
    """Adds a space in 6-character postcodes"""
    if len(postcode) >= 7 or len(postcode) <= 5:
        return postcode
    elif len(postcode) == 6:
        return postcode[0:3] + ' ' + postcode[3:6]
    
def update_city(city):
    return city.replace("City of ", "").replace("Town of ", "").replace("Township of ","").replace("toronto", "Toronto")

### Using the audit function to check the street types that aren't in "expected"

In [5]:
my_dict = audit(OSM_FILE)
    
for key in my_dict.keys():
    if len(key) <= 3:
        print key

Ehs
EHS
Cir
48
10a
8a
109
102
101
106
107
39
38
32
30
34
W
S.
6
Ben
St
99
97
14f
14d
14e
14b
14c
14a
1
147
Dog
Rd
132
24
25
26
27
21
22
23
28
403
400
13b
Bay
2b
End
88
89
E.
7
20
2
Ky
11
10
13
12
15
14
17
16
19
18
Dr
12a
12b
8
124
125
W.
Run
3
Ldg
Way
6a
N
Cv
9
7A
4
5a
Trl
57
56
50
52
Dr.
Lea
Ho
11a
11b
Row
Ave
Lan
47
5
E
4B
4b


##### I noticed a lot of abbreviations such as 'St', 'St.', 'Cir'...etc. Below I print out some of the odd looking ones just to take a look, and if deemed neccessary, add to mapping (cell 2). For example 'EHS' looks to be correct, so I don't add that to mapping.

In [3]:
print my_dict['Cir']
print my_dict['STREET']
print my_dict['street']
print my_dict['St']
print my_dict['Rd']
print my_dict['Blvd']
print my_dict['Blvd.']
print my_dict['Crt']
print my_dict['Dr']
print my_dict['Dr.']
print my_dict['EHS']
print my_dict['Ave']
print my_dict['Ave.']

set(['Father Tobin Rd, Fairwood Cir'])
set(['JARVIS STREET'])
set(['Dundas street'])
set(['222 Pearson St', '1254 Pentland St', '100 Rideau St', 'River St', 'Chatterson St', 'Masson St', 'Baldwin St', '1250 Pentland St'])
set(['Main St. S.'])
set(['Hockley Rd', 'Toynevale Rd', 'Sanatorium Rd', '120 Nonquon Rd', '155 Glovers Rd', 'Kennedy Rd', 'Spadina Rd'])
set(['Simonston Blvd', '225 Platten Blvd', ' Overlea Blvd', '226 Platten Blvd'])
set(['Cornell Centre Blvd.', 'Rexdale Blvd.'])
set([])
set(['220 Ormond Dr', '275 Ormond Dr', 'Rouge Bank Dr', '221 Ormond Dr', '1330 Trowbridge Dr', 'Wilson House Dr', '1087 Ormond Dr', '460 Woodmount Dr'])
set(['Bur Oak Dr.'])
set(['2nd Line EHS', '6th Line EHS', '5th Line EHS', '4th Line EHS', '1st Line EHS', '7th Line EHS', '3rd Line EHS'])
set(['Champlain Ave', 'Rawlinson Ave', 'Raleigh Ave', 'Warden Ave', '480 Mayfair Ave', '455 Mayfair Ave', '460 Mayfair Ave'])
set(['Dean Ave.', 'University Ave.'])


##### Below I look at some of the node types to see if there's any abnormality (I omitted printing out set of "streets" below because the result would be too big to spot abnormality with eye test)

In [13]:
street = set()
street_name = set()
street_type = set()
highway = set()
city = set()
state = set()
postcode = set()
housenumber = set()

OH = 0
ON = 0
Ontario = 0
NY = 0

import re
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

for _, element in ET.iterparse(SAMPLE_FILE, events=('end',)):
    for child in element.iter('tag'):
        if 'addr:' in child.attrib['k']:

            if re.search(r'addr:street$', child.attrib['k']):
                street.add(child.attrib['v'])

            elif re.search(r'addr:street:name$', child.attrib['k']):
                street_name.add(child.attrib['v'])

            elif re.search(r'addr:street:type$', child.attrib['k']):
                street_type.add(child.attrib['v'])

            elif re.search(r'addr:highway$', child.attrib['k']):
                highway.add(child.attrib['v'])

            elif re.search(r'addr:city$', child.attrib['k']):
                city.add(child.attrib['v'])

            elif re.search(r'addr:state$', child.attrib['k']):
                state.add(child.attrib['v'])
                if child.attrib['v'] == 'Ontario':
                    Ontario += 1
                elif child.attrib['v'] == 'ON':
                    ON += 1
                elif child.attrib['v'] == 'OH':
                    OH += 1
                elif child.attrib['v'] == 'NY':
                    NY += 1

            elif re.search(r'addr:postcode$', child.attrib['k']):
                postcode.add(child.attrib['v'])

            elif re.search(r'addr:housenumber$', child.attrib['k']):
                housenumber.add(child.attrib['v'])
                
print "street_name: {}\n".format(street_name)
print "street_type: {}\n".format(street_type)
print "highway: {}\n".format(highway)
print "city: {}\n".format(city)

print "state: {}\n".format(state)
print "ON: {}\n".format(ON)
print "Ontario: {}\n".format(Ontario)
print "NY: {}\n".format(NY)
print "OH: {}\n".format(OH)

print "postcode: {}\n".format(postcode)
print "housenumber: {}\n".format(housenumber)

street_name: set([])

street_type: set([])

highway: set([])

city: set(['City of Kawartha Lakes', 'Don Mills', 'Town of Newmarket', 'North York', 'Township of East Garafraxa', 'City of Pickering', 'Township of Amaranth', 'Agincourt', 'Richmond Hill', 'York', 'Whitby', 'Township of Uxbridge', 'Town of Aurora', 'King City', 'Mono', 'Oakville', 'Town of Bradford West Gwillimbury', 'Terra Cotta', 'Town of Whitby', 'Port Perry', 'Etobicoke', 'Town of Ajax', 'Township of Adjala-Tosorontio', 'Goodwood', 'Acton', 'Burlington', 'City of Vaughan', 'Aurora', 'City of Burlington', 'Newmarket', 'Hamilton', 'Campbellville', 'King', 'East Gwillimbury', 'Township of Guelph/Eramosa', 'Municipality of Clarington', 'Town of Erin', 'Town of East Gwillimbury', 'Caledon', 'Township of Scugog', 'Town of Whitchurch-Stouffville', 'Brampton', 'Town of Mono', 'Vaughan', 'Town of Milton', 'Town of Grimsby', 'City of Brampton', 'Dundas', 'Scarborough', 'Town of Caledon', 'Richmond Hill (Oak Ridges)', 'Town of Hal

##### STATE

I notice there are four different States so I also inserted counters for each, in the above result, which comes out to be

* ON: 9048
* Ontario: 30
* NY: 3
* OH: 3

I figure "OH" is just a typo from the correct "ON", but "NY" could likely be true since one of the towns "Niagara-on-the-Lake" is at the border of State of NY, so I don't fix it. I use the same update_name function but with a mapping_state to fix "Ontario" and "OH" to the correct "ON".

##### POSTCODE

Some postcodes are lacking a space in the middle, for example "A1B2C3" instead of the correct "A1B 2C3", for that I create an update_postcode function. 

```
def update_postcode(postcode):
    if len(postcode) >= 7 or len(postcode) <= 5:
        return postcode
    elif len(postcode) == 6:
        return postcode[0:3] + ' ' + postcode[3:6]
```

##### CITY

There's a blatant one of "toronto" instead of "Toronto", outside of that, the most obvious issue is the redunduncy in "Toronto", "City of Toronto", "Milton", "Town of Milton"...etc, I create a update_city funtion. 

```
def update_city(city):
    return city.replace("City of ", "").replace("Town of ", "").replace("Township of ","").replace("toronto", "Toronto")
```

All of these functions are now called in shape_element

# PROBLEMS ENCOUNTERED IN MY MAP
As aformentioned
1. street abbreviations (St, St.,Rd....etc)
2. postal code without space in the middle (e.g. "A1B2C3" instead of "A1B 2C3")
3. state: some are abbreviated, "ON", and some not, "Ontario", some typo "OH", some "NY"?!

In [50]:
process_map(OSM_FILE, validate=True)

# Data Overview

## File sizes
```
toronto_canada.osm ......... 1118 MB
p3.db .......... 867 MB
nodes.csv ............. 383 MB
nodes_tags.csv ........ 84 MB
ways.csv .............. 40 MB
ways_tags.csv ......... 88 MB
ways_nodes.cv ......... 128 MB 
```

## Number of nodes
```
SELECT COUNT(*) FROM nodes;
```
```
4780051
```


## Number of ways
```
SELECT COUNT(*) FROM ways;
```
```
697067
```

## Number of Unique Users
```
SELECT COUNT(DISTINCT(e.uid))
FROM (SELECT uid FROM nodes UNION ALL SELECT uid FROM ways) AS e;
```
```
1891
```

## Top 10 Unique Users
```
SELECT e.user, COUNT(*) as num
FROM (SELECT user FROM nodes UNION ALL SELECT user FROM ways) AS e
GROUP BY e.user
ORDER BY num DESC
LIMIT 10;
```
```
andrewpmk,3331259
MikeyCarter,480540
Kevo,437335
"Victor Bielawski",159208
Bootprint,159085
"Mojgan Jadidi",100465
geobase_stevens,80224
rw__,75788
"Gerit Wagner",43153
brandoncote,37936
```

# OTHER IDEAS ABOUT THE DATASET

## Top 10 amenities

```
sqlite> SELECT value, COUNT(*) as num
FROM nodes_tags
WHERE key='amenity'
GROUP BY value
ORDER BY num DESC
LIMIT 15;

fast_food,2912
restaurant,2660
bench,2195
post_box,1927
cafe,1372
parking,1267
waste_basket,1121
bank,1039
fuel,994
pharmacy,718

```
## Top 5 Religions
```
SELECT nodes_tags.value, COUNT(*) as num
FROM nodes_tags
JOIN (SELECT DISTINCT(id) FROM nodes_tags WHERE value='place_of_worship') i
ON nodes_tags.id=i.id
WHERE nodes_tags.key='religion'
GROUP BY nodes_tags.value
ORDER BY num DESC
LIMIT 5;

christian|343
muslim|15
jewish|8
buddhist|4
hindu|3
```

## Top 10 Cuisines
```
SELECT nodes_tags.value, COUNT(*) as num
FROM nodes_tags 
    JOIN (SELECT DISTINCT(id) FROM nodes_tags WHERE value='restaurant') i
    ON nodes_tags.id=i.id
WHERE nodes_tags.key='cuisine'
GROUP BY nodes_tags.value
ORDER BY num DESC;

chinese|143
indian|95
italian|84
japanese|80
pizza|54
sushi|53
thai|52
vietnamese|43
breakfast|33
chicken|33
```

Not shown in the top 10 cuisines but skimming through the list I could tell that the data could use more cleaning.

1.I thought I saw 'caribbean' for more than a couple times, so I do another query for cuisines that contains string 'car'

```
WHERE nodes_tags.value LIKE '%car%'

caribbean|13
carribean|2
african;caribbean|1
carribbean|1
carribean,_roti|1
```
I myself could never spell caribbean properly so that's not surprising.

2.The above result also reflects another issue, restaurants with mixed cuisines such as the above 'african;caribbean', so I look for results that contain ';'

```
WHERE nodes_tags.value LIKE '%;%'

korean;japanese|3
thai;chinese|2
Thai;vietnamese|1
african;caribbean|1
asian;japanese|1
bar;grill|1
chinese;asian;thai|1
chinese;japanese|1
fine_dining;international;italian;mediterranean;pasta;pizza;sandwich|1
greek;Canadian|1
hungarian; thai|1
indian; chinese|1
indian;international|1
indian;thai|1
international;italian|1
international;italian;pasta;american|1
international;pizza;pasta;sandwich;italian;burger|1
italian;international|1
italian;pizza|1
japanese;korean|1
```
Off-topic but Toronto has always been hailed as one of the most multicultural place, I wonder if this, along with place_of_worship, could be an indicator, would love to see the results from other cities.

In any case, they should establish standardized rules for tagging fusion cusines, one idea can be to rid ';' and just have extra node tags.

## Top Cafes

```
SELECT nodes_tags.value, COUNT(*) as num
FROM nodes_tags
JOIN (SELECT DISTINCT(id) FROM nodes_tags WHERE value = 'cafe') i
ON nodes_tags.id = i.id
WHERE nodes_tags.key = 'name'
GROUP BY nodes_tags.value
ORDER BY num DESC;

Tim Hortons|400
Starbucks Coffee|240
Second Cup|97
Coffee Time|67
Country Style|37
Timothy's World Coffee|17
Coffee Culture|12
Starbucks|10
Aroma Espresso Bar|9
Timothy's|9
```
I would have expected an even more dominant number from Tim Horton's over Starbucks. Anyhow, these data can be furthur cleaned as redundancies are present. "Starbucks Coffee"/"Starbucks", "Timonthy's World Coffee"/"Timothy's".


## Conclusion (ideas to improve the dataset)

Regarding the common issues we see above, spelling, redundancy, and how to tag fusion restaurants. To clean the existing data manually is not super hard, it'd just take time. For example I could find all the fusion entries by searching for ';' and use it as the seperater and turn one node tag 'korean;japanese' into two separte node tags, 'korean' and 'japanese'.
Also I could find all the cafes with 'Starbucks' in it and change them to 'Starbucks Coffee'. Better yet create a more sophisticated Regex search to detect possible spelling mistakes. However, if there are ways to prevent future wrong entries that would be even better.

One idea I'd love to see implemented that could simultaneously prevent the issues above, is to have at the point of data entry, the user gets a dropdown "suggested" list of names that is already popular, while still retaining the ability to just create his own data. (Think google search, where upon entering the first few letters, you get a suggestion of what other people search for, but you can still finish typing if you have a query that's not really the same)

Benefits
1. Spelling mistakes. Before a user could finish entering the wrong 'carribbean', he would see a selection of previous entries that include the 'caribbean' with 13 entries over the 'carribbean' with 1, he would easily go with the first one.

2. Redundancy. Again, upon typing "Starbucks' the user will see the most popular entry 'Starbucks Coffee' at 240 instead of just 'Starbucks' at 10.

3. The fusion entries are trickier but the drop down menu could at least alleviate the redundancy between 'korean;japanese' and 'japanese;korean'

4. Helps users edit others' mistake. If a user sees, from the dropdown list, 'carribbean' with an entry of 1, and he feels generous enough with his time, he could find that particular entry and edit it.

Potential Problem
Let's say at the beginning stage of the map the very first 'carribbean' entry is of the wrong spelling, it could potentially be overlooked by all the following users and becomes the leading choice from there on out. We might end up with 13 entries of wrong spelling before anyone catches it and edit it. Well at least that would be easy to clean systematically since the entries have the same wrong spelling.

Regarding the fusion cuisine entries, I think it'd be best if the user community could get together and agree upon a standard rule. For example, simply rid the use of ';' by prompting a reminder whenever a user tries to type ';' or '/' that they should be entering separate tags.
Also should be talked about is, should 'pizza' or 'sushi' be legit cuisines? or should we just stick to 'italian' and 'japanese'? We could prompt users when trying to enter words that doesn't include country names, but then 'caribbean' isn't a country either.
Another potential problem, for example Korean Mexican, has slowly became a thing in the past 20 years, started in Los Angeles and now spread all over US and internationally, even reached here in Singapore. It also wouldn't seem right to seperate it into two different tags of 'korean' and 'mexican'. I don't have an easy answer for this but I think they can be talked about.