In [15]:
import nltk
from nltk.chunk import tree2conlltags
from nltk.corpus import names
import random

#nltk.download('names')

In [16]:
class AnaphoraExample:
    
    def __init__(self): # 생성자에 변수 없음
        males = [(name, 'male') for name in names.words('male.txt')]
        females = [(name, 'female') for name in names.words('female.txt')]
        combined = males + females
        random.shuffle(combined)
        training = [(self.feature(name), gender) for (name, gender) in combined]
        self._classifier = nltk.NaiveBayesClassifier.train(training)
        
    def feature(self, word): # 249p에 feature 함수가 나올 자리에 gender 함수가 나와있으므로 ppt 참조
            return {'last(1)' : word[-1]} # 주어진 이름을 남/여로 분류하는 가장 간단한 기능 정의로, 마지막 문자만으로 판별

    def gender(self, word): # self._classifier로 저정한 분류기를 통해 남/여 구별
        return self._classifier.classify(self.feature(word))

    def learnAnaphora(self):
        sentences = [ # 샘플 문장들
            "John is a man. He walks",
            "John and Mary are married. They have two kids",
            "In order for Ravi to be successful, he should follow John",
            "John met Mary in Barista. She asked him to order a Pizza"
        ]

        for sent in sentences:
            chunks = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent)), binary=False) # 토큰화하고 품사 태깅 후 청킹
            stack = []
            print(sent)
            items = tree2conlltags(chunks)
            
            for item in items: # 청킹한 문장을 탐색, 품사에 따라 stack에 추가
                if item[1] == 'NNP' and (item[2] == 'B-PERSON' or item[2] == 'O'): 
                    stack.append((item[0], self.gender(item[0])))
                elif item[1] == 'CC':
                    stack.append(item[0])
                elif item[1] == 'PRP':
                    stack.append(item[0])
            print("\t {}".format(stack))
        

In [18]:
anaphora = AnaphoraExample()

In [19]:
anaphora.learnAnaphora()

John is a man. He walks
	 [('John', 'male'), 'He']
John and Mary are married. They have two kids
	 [('John', 'male'), 'and', ('Mary', 'female'), 'They']
In order for Ravi to be successful, he should follow John
	 [('Ravi', 'female'), 'he', ('John', 'male')]
John met Mary in Barista. She asked him to order a Pizza
	 [('John', 'male'), ('Mary', 'female'), 'She', 'him']
