In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import os
import time
import pickle
from random import randint

In [2]:
chrome_options = Options()

chrome_options.add_argument("--headless")

In [3]:
chrome_driver = os.getcwd() + "\\chromedriver.exe"

In [4]:
driver = webdriver.Chrome(options=chrome_options, executable_path=chrome_driver)

In [5]:
input_data = {}
output_data = {"consonant_cluster_simplification": [],
              }

In [6]:
input_data["consonant_cluster_simplification"] = [
    "넋",
    "몫",
    "삯",
    "닭",
    "흙",
    "칡",
    "까닭",
    "기슭",
    "삶",
    "앎",
    "없다고",
    "앉다고",
    "얹다고",
    "끼얹다고",
    "않다고",
    "끊다고",
    "많다고"
]

# (Yu Cho, 2015, p. 29)

In [7]:
for key in input_data:
    for word in input_data[key]:

        driver.get("http://pronunciation.cs.pusan.ac.kr/pronunc.htm")

        form = driver.find_element_by_name("text1")

        submit_button = driver.find_element_by_id("submit1")

        form.clear()
        form.send_keys(word)

        submit_button.click()

        result = driver.find_elements_by_class_name("td2")
        
        if result is None:
            print()
            print(f"problem with {key}: {word}:")
            print("result is None")
            print()
            continue

        if result[0].text != result[1].text:
            print()
            print(f"problem with {key}: {word}:")
            for item in result:
                print(item.text)
            print()
        else:
            output_data[key] += [[result[1].text, result[2].text, result[3].text]]

        time.sleep(randint(10, 20))


problem with consonant_cluster_simplification: 앉다고:
앉다고
안다고
안다고/안따고
andago/ant̕ago
안다고
안다고/안따고



problem with consonant_cluster_simplification: 얹다고:
얹다고
언다고
언다고
ʌndago
언다고
언다고



problem with consonant_cluster_simplification: 끼얹다고:
끼얹다고
끼얹자고
끼언짜고
k̕iʌnts̕ago
끼얹자고
끼언짜고



problem with consonant_cluster_simplification: 끊다고:
끊다고
끈다고
끈다고
k̕ɯndago
끈다고
끈다고




In [8]:
# manually handling problem cases

In [9]:
output_data["consonant_cluster_simplification"] += [["앉다고", "안따고", "ant̕ago"]]

In [10]:
output_data["consonant_cluster_simplification"] += [["얹다고", "언따고", "ʌnt̕ago"]]

In [11]:
output_data["consonant_cluster_simplification"] += [["끼얹다고", "끼언따고", "k̕iʌnt̕ago"]]

In [12]:
output_data["consonant_cluster_simplification"] += [["끊다고", "끈타고", "k̕ɯntʰago"]]

In [13]:
output_data

{'consonant_cluster_simplification': [['넋', '넉', 'nʌk̚'],
  ['몫', '목', 'mok̚'],
  ['삯', '삭', 'sʰak̚'],
  ['닭', '닥', 'tak̚'],
  ['흙', '흑', 'hɯk̚'],
  ['칡', '칙', 'tsʰik̚'],
  ['까닭', '까닥', 'k̕adak̚'],
  ['기슭', '기슥', 'kisʰɯk̚'],
  ['삶', '삼', 'sʰam'],
  ['앎', '암', 'am'],
  ['없다고', '업따고', 'ʌp̚t̕ago'],
  ['않다고', '안타고', 'antʰago'],
  ['많다고', '만타고', 'mantʰago'],
  ['앉다고', '안따고', 'ant̕ago'],
  ['얹다고', '언따고', 'ʌnt̕ago'],
  ['끼얹다고', '끼언따고', 'k̕iʌnt̕ago'],
  ['끊다고', '끈타고', 'k̕ɯntʰago']]}

In [14]:
f = open("validation_data_handbook_of_korean_linguistics.p", "wb")

pickle.dump(output_data, f)

f.close()

# References

Yu Cho, Young-Mee. (2015). Syllable-Based Phonological Processes. In L. Brown & J. Yeon (Eds.), The Handbook of Korean Linguistics (Blackwell Handbooks in Linguistics) (1st ed., pp. 22-40). Wiley-Blackwell.