## Helper Libraries

In [34]:
from typing import *
import json
import pandas as pd

## Process Data

In [48]:
def process_data(json_file: str):
    with open(json_file, 'r', encoding="utf8") as f:
        data = json.load(f)
    provinces = []
    for province in data:
        provinces.append(province['VietnameseName'])

In [49]:
process_data('data\\simplified_json_generated_data_vn_units_minified.json')

KeyError: 'VietnameseName'

## Preprocess

In [None]:
def read_input(filename: str):
    pass

In [None]:
def preprocess(input):
    pass

## Tree Traverse

In [31]:
class TrieNode:
    # Info saved at the end of each word
    #    - is_end: bool
    #    - related province / district / ward
    def __init__(self):
        self.children = {}
        self.is_end = False

    def insert(self, word):
        node = self
        for c in word:
            if c not in node.children:
                node.children[c] = TrieNode()
            node = node.children[c]
        node.is_end = True

    def search(self, word):
        node = self
        for c in word:
            if c not in node.children:
                return False
            node = node.children[c]
        return node.is_end
    
    def dfs (self, node, word_list, word):
        for child in node.children:
            if node.children[child].is_end:
                word_list.append(word + child)
            self.dfs(node.children[child], word_list, word + child)
    
    def startsWith(self, prefix):
        node = self
        for c in prefix:
            if c not in node.children:
                return False
            node = node.children[c]
        word_list = []
        if node.is_end:
            word_list.append(prefix)
        self.dfs(node, word_list, prefix)
        return word_list

    def delete(self, word):     # return False if word not found, True if word deleted
        node = self
        for c in word:
            if c not in node.children:
                return False
            node = node.children[c]
        node.is_end = False
        return True

## Test

In [25]:
def test_trie():
    root = TrieNode()
    root.insert("apple")
    print(root.search("apple"))   # return True
    print(root.search("app"))     # return False
    print(root.startsWith("app")) # return True
    root.insert("applepine")
    print(root.search("app"))     # return True
    print(root.startsWith("appl"))
    root.insert("app")
    print(root.startsWith("app"))

In [None]:
def test_data_process():
    province_trie = process_data("data.json")
    return province_trie

In [29]:
test_trie()

True
False
['apple']
False
['apple', 'applepine']
['app', 'apple', 'applepine']


In [30]:
word = "hello"
word2 = "hello"
print(word + word2)
print(word)

hellohello
hello
