# 5. Dependency Paring
- 의문 문법

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import nltk
import random
import numpy as np
from collections import Counter, OrderedDict
import nltk
from nltk.tree import Tree
import os
from IPython.display import Image, display
from nltk.draw import TreeWidget
from nltk.draw.util import CanvasFrame
flatten = lambda l: [item for sublist in l for item in sublist]
random.seed(1024)

In [2]:
USE_CUDA = torch.cuda.is_available()
gpus = [0]
#torch.cuda.set_device(gpus[0])

FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if USE_CUDA else torch.ByteTensor

In [3]:
def getBatch(batch_size, train_data):
    random.shuffle(train_data)
    sindex = 0
    eindex = batch_size
    while eindex < len(train_data):
        batch = train_data[sindex: eindex]
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        yield batch
    
    if eindex >= len(train_data):
        batch = train_data[sindex:]
        yield batch

In [4]:
def prepare_sequence(seq, to_index):
    idxs = list(map(lambda w: to_index[w] if to_index.get(w) is not None else to_index["<unk>"], seq))
    return Variable(LongTensor(idxs))

In [5]:
# Borrowed from https://stackoverflow.com/questions/31779707/how-do-you-make-nltk-draw-trees-that-are-inline-in-ipython-jupyter

def draw_nltk_tree(tree):
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    tc['node_font'] = 'arial 15 bold'
    tc['leaf_font'] = 'arial 15'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'
    cf.add_widget(tc, 50, 50)
    cf.print_to_file('tmp_tree_output.ps')
    cf.destroy()
    os.system('convert tmp_tree_output.ps tmp_tree_output.png')
    display(Image(filename='tmp_tree_output.png'))
    os.system('rm tmp_tree_output.ps tmp_tree_output.png')

In [6]:
class TransitionState(object):
    
    def __init__(self, tagged_sent):
        self.root = ('ROOT', '<root>', -1)
        self.stack = [self.root]
        self.buffer = [(s[0], s[1], i) for i, s in enumerate(tagged_sent)]
        self.address = [s[0] for s in tagged_sent] + [self.root[0]]
        self.arcs = []
        self.terminal=False
        
    def __str__(self):
        return 'stack : %s \nbuffer : %s' % (str([s[0] for s in self.stack]), str([b[0] for b in self.buffer]))
    
    def shift(self):
        
        if len(self.buffer) >= 1:
            self.stack.append(self.buffer.pop(0))
        else:
            print("Empty buffer")
            
    def left_arc(self, relation=None):
        
        if len(self.stack) >= 2:
            arc = {}
            s2 = self.stack[-2]
            s1 = self.stack[-1]
            arc['graph_id'] = len(self.arcs)
            arc['form'] = s1[0]
            arc['addr'] = s1[2]
            arc['head'] = s2[2]
            arc['pos'] = s1[1]
            if relation:
                arc['relation'] = relation
            self.arcs.append(arc)
            self.stack.pop(-2)
            
        elif self.stack == [self.root]:
            print("Element Lacking")
    
    def right_arc(self, relation=None):
        
        if len(self.stack) >= 2:
            arc = {}
            s2 = self.stack[-2]
            s1 = self.stack[-1]
            arc['graph_id'] = len(self.arcs)
            arc['form'] = s2[0]
            arc['addr'] = s2[2]
            arc['head'] = s1[2]
            arc['pos'] = s2[1]
            if relation:
                arc['relation'] = relation
            self.arcs.append(arc)
            self.stack.pop(-1)
            
        elif self.stack == [self.root]:
            print("Element Lacking")
    
    def get_left_most(self, index):
        left=['<NULL>', '<NULL>', None]
        
        if index == None: 
            return left
        for arc in self.arcs:
            if arc['head'] == index:
                left = [arc['form'], arc['pos'], arc['addr']]
                break
        return left
    
    def get_right_most(self, index):
        right=['<NULL>', '<NULL>', None]
        
        if index == None: 
            return right
        for arc in reversed(self.arcs):
            if arc['head'] == index:
                right=[arc['form'], arc['pos'], arc['addr']]
                break
        return right
    
    def is_done(self):
        return len(self.buffer) == 0 and self.stack == [self.root]
    
    def to_tree_string(self):
        if self.is_done() == False: 
            return None
        ingredient = []
        for arc in self.arcs:
            ingredient.append([arc['form'], self.address[arc['head']]])
        ingredient = ingredient[-1:] + ingredient[:-1]
        return self._make_tree(ingredient, 0)
    
    def _make_tree(self, ingredient, i, new=True):
    
        if new:
            treestr = "("
            treestr += ingredient[i][0]
            treestr += " "
        else:
            treestr = ""
        ingredient[i][0] = "CHECK"

        parents,_ = list(zip(*ingredient))

        if ingredient[i][1] not in parents:
            treestr += ingredient[i][1]
            return treestr

        else:
            treestr += "("
            treestr += ingredient[i][1]
            treestr += " "
            for node_i, node in enumerate(parents):
                if node == ingredient[i][1]:
                    treestr += self._make_tree(ingredient, node_i, False)
                    treestr += " "

            treestr = treestr.strip()
            treestr += ")"
        if new:
            treestr += ")"
        return treestr

In [8]:
state = TransitionState(nltk.pos_tag("He has good control .".split()))
print(state)
state.shift()
state.shift()
print(state)
state.left_arc()
print(state)
print(state.arcs)
state.shift()
state.shift()
print(state)
state.left_arc()
print(state)
state.right_arc()
print(state)
state.shift()
state.right_arc()
print(state)
state.right_arc()
print(state)
print(state.arcs)
state.is_done()

stack : ['ROOT'] 
buffer : ['He', 'has', 'good', 'control', '.']
stack : ['ROOT', 'He', 'has'] 
buffer : ['good', 'control', '.']
stack : ['ROOT', 'has'] 
buffer : ['good', 'control', '.']
[{'graph_id': 0, 'form': 'has', 'addr': 1, 'head': 0, 'pos': 'VBZ'}]
stack : ['ROOT', 'has', 'good', 'control'] 
buffer : ['.']
stack : ['ROOT', 'has', 'control'] 
buffer : ['.']
stack : ['ROOT', 'has'] 
buffer : ['.']
stack : ['ROOT', 'has'] 
buffer : []
stack : ['ROOT'] 
buffer : []
[{'graph_id': 0, 'form': 'has', 'addr': 1, 'head': 0, 'pos': 'VBZ'}, {'graph_id': 1, 'form': 'control', 'addr': 3, 'head': 2, 'pos': 'NN'}, {'graph_id': 2, 'form': 'has', 'addr': 1, 'head': 3, 'pos': 'VBZ'}, {'graph_id': 3, 'form': 'has', 'addr': 1, 'head': 4, 'pos': 'VBZ'}, {'graph_id': 4, 'form': 'ROOT', 'addr': -1, 'head': 1, 'pos': '<root>'}]


True

In [9]:
state.to_tree_string()

'(ROOT (has He (control good) .))'

In [10]:
draw_nltk_tree(Tree.fromstring(state.to_tree_string()))

FileNotFoundError: [Errno 2] No such file or directory: 'tmp_tree_output.png'