In [1]:
import nltk

In [9]:
class LearningDictionary():
    def __init__(self, sentence): # init
        self.words = nltk.word_tokenize(sentence) # 토큰화 
        self.tagged = nltk.pos_tag(self.words) # 품사 식별 후 태그 저장
        self.buildDictionary()
        self.buildReverseDictionary()

    def buildDictionary(self):
        self.dictionary = {}
        for (word, pos) in self.tagged: # 반복문을 통해 비어있는 dictionary를 초기화한다.
            self.dictionary[word] = pos

    def buildReverseDictionary(self):
        self.rdictionary = {}
        for key in self.dictionary.keys(): # 모든 dictionary 키의 품사를 추출한다
            value = self.dictionary[key]
            if value not in self.rdictionary: # rdictionary: 역딕셔너리
                self.rdictionary[value] = [key]
            else:
                self.rdictionary[value].append(key)

    def isWordPresent(self, word): # 단어가 있는지 없는지 여부를 확인
        return 'Yes' if word in self.dictionary else 'No'

    def getPOSForWord(self, word): # 매개변수로 주어진 단언의 품사를 반환
        return self.dictionary[word] if word in self.dictionary else None

    def getWordsForPOS(self, pos): # 역딕셔너리를 조사해 주어진 품사가 있는 문장의 모든 단어 반환(193p)
        return self.rdictionary[pos] if pos in self.rdictionary else None

In [10]:
sentence = "All the flights got delayed due to bad weather"

In [11]:
learning = LearningDictionary(sentence)

In [12]:
words = ["chair", "flights", "delayed", "pencil", "weather"]

In [13]:
pos = ["NN", "VBS", "NNS"]

In [18]:
for word in words:
    status = learning.isWordPresent(word) # word가 딕셔너리에 있는지 확인
    print("Is '{}' present in dictionary ? : '{}'".format(word, status))
    if status is True:
        print("\tPOS For '{}' is '{}'".format(word, learning.getPOSForWord(word)))

Is 'chair' present in dictionary ? : 'No'
Is 'flights' present in dictionary ? : 'Yes'
Is 'delayed' present in dictionary ? : 'Yes'
Is 'pencil' present in dictionary ? : 'No'
Is 'weather' present in dictionary ? : 'Yes'


In [19]:
for pword in pos:
    print("POS '{}' has '{}' words".format(pword, learning.getWordsForPOS(pword)))

POS 'NN' has '['weather']' words
POS 'VBS' has 'None' words
POS 'NNS' has '['flights']' words
