### Match location informations and POI informations

In [2]:
from pprint import pprint
import re
import json

In [8]:
with open("taiwan_districts.json", 'r') as file, open("taiwan_districts_mapping.json", 'w', encoding='utf-8-sig') as outfile:
    data = json.load(file)
    dataset = {}
    for county in data:
        districts = {}
        for d in county['districts']:
            districts[d['name']] = [d['name']]
            if len(d['name']) > 2:
                districts[d['name']].append(d['name'][:2])
        dataset[county['name']] = districts
    json.dump(dataset, outfile, ensure_ascii=False)

In [7]:
def countyMapping(mapping_list, addr):
    for county in mapping_list:
        match = re.search(county, addr)
        if match: return 1, county
    for county in mapping_list:
        match = re.search(county[:2], addr)
        if match: return 1, county
    #if addr has no county
    for county in mapping_list:
        pattern = "|".join(t for town in mapping_list[county] for t in mapping_list[county][town] if len(t)>2)
        if pattern == '': continue
        match = re.search(pattern, addr)
        if match: return 0, county
    return None, None

def townMapping(mapping_list, addr, countyflag):
    #if addr has no county
    if countyflag:
        addr = addr[2:]
    for key, value in mapping_list.items():
        pattern = "|".join(loc for loc in value)
        match = re.search(pattern, addr)
        if match: 
            return key
    

with open("poi_data/taiwan_districts_mapping.json", 'r', encoding='utf-8-sig') as file:
    discrits = json.load(file)
    dataset = {}
    for county in discrits:
        dataset[county] = {}
        for town in discrits[county]:
            dataset[county][town] = []

with open("poi_data/scenic_spot_C_f.json", 'r', encoding='utf-8-sig') as file:
    scenic_spot = json.load(file)
    err = 0
    ok = 0
    noaddr = 0
    for poi in scenic_spot['XML_Head']['Infos']['Info']:
        if poi['Add'] == '' or len(poi['Add']) < 6:
            noaddr += 1
            continue
        addr = re.sub(r'台', '臺', poi['Add'])
        countyflag, county = countyMapping(discrits, addr)
        try:
            town = townMapping(discrits[county], addr, countyflag)
            dataset[county][town].append({"Name":poi['Name'], "Add":poi['Add']})
        except:
            err += 1
            print (county, town)
            print ({"Name":poi['Name'], "Add":poi['Add']})
        else:
            ok += 1
             
with open("poi_data/location_POIs.json", 'w', encoding='utf8') as file:
    json.dump(dataset, file, ensure_ascii=False)

None 南竿鄉
{'Name': '印地安人頭岩', 'Add': '在大漢據點正上方，可於面對北海坑道右側觀景台欣賞'}
None 金山區
{'Name': '金包里古道', 'Add': '陽金公路大油坑至八煙之間'}
宜蘭縣 None
{'Name': '壯圍沙丘旅遊服務園區', 'Add': '宜蘭縣壯濱路二段196巷18號'}
新竹縣 None
{'Name': '情人谷步道', 'Add': '新竹縣內灣大橋後的阿三哥餐廳，由其旁邊的道路往上步行約五分鐘即可到起點'}
新竹縣 None
{'Name': '木馬古道', 'Add': '新竹縣內灣木馬古道'}
新竹縣 None
{'Name': '峰城炭窯 (木炭的家)', 'Add': '新竹縣三峰國小附近'}
新竹縣 None
{'Name': '南大隘休閒景觀道路', 'Add': '新竹縣122縣道南清公路35.5公里往上至白蘭下山122號線道路50公里處終點'}
新竹縣 None
{'Name': '桃山隧道', 'Add': '桃山隧道(新竹縣122縣道南清公路)'}
新竹縣 None
{'Name': '文化兒童公園', 'Add': '新竹縣文平路與吳濁流路交叉口'}
新竹縣 None
{'Name': '鳳山溪麻園生態公園', 'Add': '新竹縣台一號道(中華路)過鳳山溪橋進入長青路沿鳳山溪堤防，鳳山溪堤防內'}
苗栗縣 None
{'Name': '玉清宮', 'Add': '苗栗市玉清里坡塘下1號'}
苗栗縣 None
{'Name': '老家藝文空間', 'Add': '苗栗市中苗里建台街1巷13號'}
None 仁愛鄉
{'Name': '永興大樟樹', 'Add': '永興村永興活動中心旁，鄰投58號鄉道'}
嘉義市 None
{'Name': '林業試驗所中埔研究中心', 'Add': '行政中心:嘉義市文化路432巷65號'}
None 阿里山鄉
{'Name': '沼平公園', 'Add': '阿里山森林遊樂區內'}
None 新港鄉
{'Name': '新港三朵花的動人故事', 'Add': '板頭村北港溪畔河堤上'}
None 阿里山鄉
{'Name': '象鼻木', 'Add': '阿里山森林遊樂區內'}
None 阿里山鄉
{'Name': '高山博物館',

In [8]:
print ("Can't find location: {}".format(err))
print ("No address or short address: {}".format(noaddr))
print ("Can find location: {}".format(ok))

Can't find location: 107
No address or short address: 85
Can find location: 4966
