# Process

## 0. Dependency Parsing

In [526]:
import spacy
import numpy as np
import pandas as pd

nlp=spacy.load('en_core_web_sm/en_core_web_sm-3.4.1/')

text='Book me the morning flight'
word_list = list()
dep_list = list()
# list_text = np.empty(5)
for index,token in enumerate(nlp(text)):
    print(index+1, token.text,'=>',token.dep_,'=>',token.head.text)
    # text = np.array([index+1,token.text,token.dep_])
    # np.append(list_text,text)
    word_list.append((index+1,token.text)) #token.dep_ 
    dep_list.append((token.text,token.dep_))

1 Book => ROOT => Book
2 me => dobj => Book
3 the => det => flight
4 morning => compound => flight
5 flight => appos => Book


In [521]:
from spacy import displacy
displacy.render(nlp(text),jupyter=True)

In [527]:
word_list #Buffer list

[(1, 'Book'), (2, 'me'), (3, 'the'), (4, 'morning'), (5, 'flight')]

## 1. Stack : Enqueue and Dequeue 

In [171]:
from collections import deque
class Stack(object):
    def __init__(self):
        self.container = deque()
    def push(self,value):
        self.container.append(value)
    def pop(self):
        if len(self.container) != 0:
            return self.container.pop()  
        else: raise IndexError("An empty deque")
    def size(self):
        return len(self.container)

In [294]:
s = Stack()
#push items
for i in range(len(word_list)):
    s.push(word_list[i][1])
    
print("Initial queue")
print(s.container)

#pop items
for i in range(len(word_list)):
    s.pop()
    
print("\nQueue after removing elements")
print(s.container)

#Finish Stack is easy!!!

Initial queue
deque(['Book', 'me', 'the', 'morning', 'flight'])

Queue after removing elements
deque([])


## 2. Transition-based : Stack and Buffer

- LEFTARC: Assert a head-dependent relation between the word at the top of the stack and the word directly beneath it; 
remove the lower word from the stack. 
- RIGHTARC: Assert a head-dependent relation between the second word on the stack and the word at the top; 
remove the word at the top of the stack; 
- SHIFT: Remove the word from the front of the input buffer and push it onto the stack.

Start with root on stack, buffer with whole sentence
- If there’s nothing on the stack, you must shift
- If the top of the stack is the child of the top of the buffer, then make a left
edge
- If the top of the buffer is is a child of the top of the stack and the top of
the buffer has no children that have yet to be added to the tree, then
make a right



In [528]:
class Configuration(object):
    def __init__(self,dependency_list):
        self.stack = [(0,'root')]
        self.buffer = dependency_list
        self.arcs = list()
    def __str__(self):
        return f'Stack : {self.stack} \nBuffer : {self.buffer} \nArcs : {self.arcs}'

class Transition(object):
    def __init__(self):
        pass
    #Arc-standard parsing cannot produce non-projective trees
    def left_arc(self,config,relation): 
        #pop top of stack -> append arc relation
        index_i = config.stack.pop()
        index_j = config.stack.pop()
        config.stack.append(index_i)
        config.arcs.append((index_i, relation, index_j)) 

    def right_arc(self,config,relation):
        #pop top of stack -> append arc relation
        index_i = config.stack.pop()
        index_j = config.stack.pop()
        config.stack.append(index_j)
        config.arcs.append((index_j, relation, index_i)) 

    def shift(self,config): #move buffer to stack
        if len(config.buffer) <= 0:
            return -1
        index_i = config.buffer.pop(0)
        config.stack.append(index_i)
    #Arc-eager : Future Work

class Parser(object):
    def __init__(self, state, sentence, feature_map):
        self.sentence = sentence
        self.state = state
        self.map = feature_map
        self.data = []

    def oracle(self):
        pass


In [529]:
buffer_list = word_list.copy()
config = Configuration(buffer_list)

In [530]:
#Manually
Tran = Transition()
Tran.shift(config) 
# print('Step 1 :\n',config)
Tran.shift(config) 
# print('Step 2 :\n',config)
Tran.right_arc(config,'->') 
# print('Step 3 :\n',config)
Tran.shift(config) 
# print('Step 4 :\n',config)
Tran.shift(config) 
# print('Step 5 :\n',config)
Tran.shift(config) 
# print('Step 6 :\n',config)
Tran.left_arc(config,'->')
# print('Step 7 :\n',config)
Tran.left_arc(config,'->')
# print('Step 8 :\n',config)
Tran.right_arc(config,'->')
# print('Step 9 :\n',config)
Tran.right_arc(config,'->')
print('Step 10 : Done!\n',config)
#how to change it to dynamic -> Machine Learning YEAH!!!!!!

Step 10 : Done!
 Stack : [(0, 'root')] 
Buffer : [] 
Arcs : [((1, 'Book'), '->', (2, 'me')), ((5, 'flight'), '->', (4, 'morning')), ((5, 'flight'), '->', (3, 'the')), ((1, 'Book'), '->', (5, 'flight')), ((0, 'root'), '->', (1, 'Book'))]


## 3. Dependency Tree
- The dependency structure is a tree (directed acyclic graph) with the main verb as its root (head).

### Reference : Transition-Based
- https://www.youtube.com/watch?v=xG0MXIPVUdk&list=PLyyEwPZh6aHpDieqWwr7hTgreKnYSY9Ox&index=60
- https://www.youtube.com/watch?v=2jLk93iIyrw&list=PLr9TFf9GjancAHiiP5cqGmYsZad99Uf_h&index=39
- https://www.youtube.com/watch?v=f5-hTA9hA3s&list=WL&index=8
- https://www.youtube.com/watch?v=oLHnqGmQtI4
- http://www.cs.umd.edu/class/fall2017/cmsc723/slides/slides_13.pdf
- Chapter 15 Dependency Parsing, Speech and Language Processing. Daniel Jurafsky & James H. Martin. Copyright c 2019. All rights reserved. Draft of October 2, 2019.
- Covington, M. (2001). A fundamental algorithm for dependency parsing. In Proceedings of the 39th Annual ACM Southeast Conference, 95–102.
- Nivre, J. (2003). An efficient algorithm for projective dependency parsing. In Proceedings of the 8th International Workshop on Parsing Technologies (IWPT).

## 4. Chu-Liu-Edmonds : Maximum Spanning Tree

In [342]:
class MST: 
    adj = []
    def __init__(self, v, e): 
        self.v = v
        self.e = e
        MST.adj = [[0 for i in range(self.v)] for j in range(self.v)]
        
    def addEdge(self, s, e): #Add to Matrix
        MST.adj[s-1][e-1] = MST.adj[e-1][s-1] = 1


In [296]:
size = len(word_list)
Matrix = MST(size,size)
# Matrix.addEdge(1, 2)
# Matrix.addEdge(2, 6)
# Matrix.addEdge(3, 4)
# Matrix.addEdge(4, 6)
# Matrix.addEdge(5, 3)
# Matrix.addEdge(5, 4)
Matrix.adj

[[0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0]]

### Reference 
- https://courses.cs.washington.edu/courses/cse490u/17wi/slides/CLE.pdf

## 5. Convert to Binary Tree

In [518]:
class Node(object):
    def __init__(self, key):
        self.left  = None
        self.right = None
        self.key = key
        
    def insert(self, key):
        if(self.key):
            if(key < self.key):
                if(self.left == None):
                    self.left = Node(key)
                else:
                    self.left.insert(key)
            
            elif(key >= self.key):
                if(self.right == None):
                    self.right = Node(key)
                else:
                    self.right.insert(key)
        else:
            self.key = key
    
    def printT(self):        
        if (self.left):
            self.left.printT()
        print(self.key)
        if (self.right):
            self.right.printT()
    
    def delete(self,value):
        if value < self.key: #CaseD
            if self.left:
                self.left = self.left.delete(value)
        elif value > self.key: #CaseC
            if self.right:
                self.right = self.right.delete(value)
        else:
            if self.left == None and self.right == None: #Case A and B
                return None
            elif self.left == None:
                return self.right
            elif self.right == None: 
                return self.left

In [519]:
root = Node(len(word_list))
for i in word_list:
    root.insert(i[0])

root.printT()

1
2
3
4
5
5


# Future Work
- Projective vs Non-Projective