# Process

# Dependency Parsing

In [3]:
import spacy

nlp=spacy.load('en_core_web_sm/en_core_web_sm-3.4.1/')

text='Book me the morning flight' #arc-standard
# text= 'book the flight through houston' #arc-eager
# text = 'I am hungry' #arc-standard
word_list = list()
# dep_list = list()
for index,token in enumerate(nlp(text)):
    print(index+1, token.text,'=>',token.dep_,'=>',token.head.text)
    # text = np.array([index+1,token.text,token.dep_])
    # np.append(list_text,text)
    word_list.append((index+1,token.text,token.head.text)) #token.dep_ 
    # dep_list.append((token.text,token.dep_,token.head.text))

1 Book => ROOT => Book
2 me => dobj => Book
3 the => det => flight
4 morning => compound => flight
5 flight => appos => Book


In [4]:
from spacy import displacy
displacy.render(nlp(text),jupyter=True)

In [5]:
word_list #Buffer list

[(1, 'Book', 'Book'),
 (2, 'me', 'Book'),
 (3, 'the', 'flight'),
 (4, 'morning', 'flight'),
 (5, 'flight', 'Book')]

# 1. Transition-Based Dependency Parsing

## 1.1.1 Stack : Enqueue and Dequeue (Non-related Dependency Parsing)

In [48]:
from collections import deque
class Stack(object):
    def __init__(self):
        self.container = deque()
    def push(self,value):
        self.container.append(value)
    def pop(self):
        if len(self.container) != 0:
            return self.container.pop()  
        else: raise IndexError("An empty deque")
    def size(self):
        return len(self.container)

In [49]:
s = Stack()
#push items
for i in range(len(word_list)):
    s.push(word_list[i][1])
    
print("Initial queue")
print(s.container)

#pop items
for i in range(len(word_list)):
    s.pop()
    
print("\nQueue after removing elements")
print(s.container)

#Finish Stack is easy!!!

Initial queue
deque(['Book', 'me', 'the', 'morning', 'flight'])

Queue after removing elements
deque([])


## 1.1.2. Stack and Buffer

- LEFTARC: Assert a head-dependent relation between the word at the top of the stack and the word directly beneath it; 
remove the lower word from the stack. 
- RIGHTARC: Assert a head-dependent relation between the second word on the stack and the word at the top; 
remove the word at the top of the stack; 
- SHIFT: Remove the word from the front of the input buffer and push it onto the stack.

Start with root on stack, buffer with whole sentence
- If there’s nothing on the stack, you must shift
- If the top of the stack is the child of the top of the buffer, then make a left
edge
- If the top of the buffer is is a child of the top of the stack and the top of
the buffer has no children that have yet to be added to the tree, then
make a right



In [9]:
class Configuration(object):
    def __init__(self,dependency_list):
        self.stack = [(0,'root','ROOT')]
        self.buffer = dependency_list
        self.arcs = list()
    def __str__(self):
        return f'Stack : {self.stack} \nBuffer : {self.buffer} \nArcs : {self.arcs}'

class Transition(object):
    def __init__(self,approach):
        self.approach = approach #'arc-standard' 'arc-eager'
    #Arc-standard parsing cannot produce non-projective trees
    def left_arc(self,config,relation):
        if self.approach == 'arc-standard':
            #pop top of stack -> append arc relation
            index_i = config.stack.pop()
            index_j = config.stack.pop()
            config.stack.append(index_i)
            config.arcs.append((index_i, relation, index_j)) 
        elif self.approach == 'arc-eager':
            pass

    def right_arc(self,config,relation):
        if self.approach == 'arc-standard':
            #pop top of stack -> append arc relation
            index_i = config.stack.pop()
            index_j = config.stack.pop()
            config.stack.append(index_j)
            config.arcs.append((index_j, relation, index_i)) 
        elif self.approach == 'arc-eager':
            pass

    def shift(self,config): #move buffer to stack
        if len(config.buffer) <= 0:
            return -1
        index_i = config.buffer.pop(0)
        config.stack.append(index_i)

    def reduce(sefl,config):
        pass

class Parser(object):
    def __init__(self,approach):
        self.approach = approach

    def oracle(self,config):
        operation = Transition(self.approach)
        i = 0
        while not(len(config.buffer) == 0 and len(config.stack) == 1): #stop when buffer is empty and stack contain only root
            if (len(config.buffer) == 0 and len(config.stack) == 2):
                print('Right-Arc',end='->')
                operation.right_arc(config,'->')
                
            if len(config.stack) == 1:
                print("Shift_along",end='->')
                operation.shift(config)
            else:
                if config.stack[-1][1] == config.stack[-2][2]: 
                    print('Left-Arc',end='->')
                    operation.left_arc(config,'->')
                elif (config.stack[-1][2] == config.stack[-2][1]) : #next_head = prev_text
                    print('Right-Arc',end='->')
                    operation.right_arc(config,'->')
                else:
                    print("Shift",end='->')
                    operation.shift(config)
            i+=1
            if i == 20:
                break
        # print('\n',config)
        print('\n') 
        return config

In [13]:
buffer_list = word_list.copy()
config = Configuration(buffer_list)
print(config)

Stack : [(0, 'root', 'ROOT')] 
Buffer : [(1, 'Book', 'Book'), (2, 'me', 'Book'), (3, 'the', 'flight'), (4, 'morning', 'flight'), (5, 'flight', 'Book')] 
Arcs : []


In [14]:
#Dynamic Way
parsing = Parser('arc-standard')
new_config = parsing.oracle(config)
print(new_config)

Shift_along->Shift->Right-Arc->Shift->Shift->Shift->Left-Arc->Left-Arc->Right-Arc->Right-Arc->Shift_along->

Stack : [(0, 'root', 'ROOT')] 
Buffer : [] 
Arcs : [((1, 'Book', 'Book'), '->', (2, 'me', 'Book')), ((5, 'flight', 'Book'), '->', (4, 'morning', 'flight')), ((5, 'flight', 'Book'), '->', (3, 'the', 'flight')), ((1, 'Book', 'Book'), '->', (5, 'flight', 'Book')), ((0, 'root', 'ROOT'), '->', (1, 'Book', 'Book'))]


### Manually

In [12]:
# Text for
text='Book me the morning flight'

word_list = list()
# dep_list = list()
for index,token in enumerate(nlp(text)):
    print(index+1, token.text,'=>',token.dep_,'=>',token.head.text)
    # text = np.array([index+1,token.text,token.dep_])
    # np.append(list_text,text)
    word_list.append((index+1,token.text,token.head.text))

buffer_list = word_list.copy()
config = Configuration(buffer_list)
# print(config)

Tran = Transition('arc-standard')
Tran.shift(config) 
# print('Step 1 :\n',config)
Tran.shift(config) 
# print('\nStep 2 :\n',config)
Tran.right_arc(config,'->') 
# print('\nStep 3 :\n',config)
Tran.shift(config) 
# print('\nStep 4 :\n',config)
Tran.shift(config) 
# print('\nStep 5 :\n',config)
Tran.shift(config) 
# print('\nStep 6 :\n',config)
Tran.left_arc(config,'->')
# print('\nStep 7 :\n',config)
Tran.left_arc(config,'->')
# print('\nStep 8 :\n',config)
Tran.right_arc(config,'->')
# print('\nStep 9 :\n',config)
Tran.right_arc(config,'->')
print('\nStep 10 : Done!\n',config)
#how to change it to dynamic -> Machine Learning YEAH!!!!!! NO

1 Book => ROOT => Book
2 me => dobj => Book
3 the => det => flight
4 morning => compound => flight
5 flight => appos => Book

Step 10 : Done!
 Stack : [(0, 'root', 'ROOT')] 
Buffer : [] 
Arcs : [((1, 'Book', 'Book'), '->', (2, 'me', 'Book')), ((5, 'flight', 'Book'), '->', (4, 'morning', 'flight')), ((5, 'flight', 'Book'), '->', (3, 'the', 'flight')), ((1, 'Book', 'Book'), '->', (5, 'flight', 'Book')), ((0, 'root', 'ROOT'), '->', (1, 'Book', 'Book'))]


## 1.2. Dependency Tree
- The dependency structure is a tree (directed acyclic graph) with the main verb as its root (head).

In [24]:
new_config.arcs

[((1, 'Book', 'Book'), '->', (2, 'me', 'Book')),
 ((5, 'flight', 'Book'), '->', (4, 'morning', 'flight')),
 ((5, 'flight', 'Book'), '->', (3, 'the', 'flight')),
 ((1, 'Book', 'Book'), '->', (5, 'flight', 'Book')),
 ((0, 'root', 'ROOT'), '->', (1, 'Book', 'Book'))]

In [45]:
#store to list
from collections import defaultdict
graph = defaultdict(list)
for i in new_config.arcs:
    graph[f"{i[0][0]}"].append(f"{i[2][0]}")
graph

defaultdict(list, {'1': ['2', '5'], '5': ['4', '3'], '0': ['1']})

### 1.2.1 Breadth-first Search

In [50]:
def BFS(graph,s):
    visited = set() #unique number
    queue = set()   #list in python is basiaclly queue
    visited.add(s)  #means make it black
    queue.add(s)

    while queue:        #as long as the queue is not empty....
        u = queue.pop() #pop the front guy.... basiaclly index 0
        
        print(u, "-->", end = " ") 
        for neighbor in graph[u]:       #for everyone who connects to u,
            if neighbor not in visited:
                visited.add(neighbor)    #add them to visited
                queue.add(neighbor)      #add them to the queue

BFS(graph,'0')

0 --> 1 --> 5 --> 3 --> 2 --> 4 --> 

### 1.2.2 Depth-first Search

In [51]:
visited = set()

def DFS(graph,s):
    if s not in visited:
        print(s,'-->',end=" ")
        visited.add(s)
        for neighbor in graph[s]:
            DFS(graph,neighbor)

DFS(graph,'0')

0 --> 1 --> 2 --> 5 --> 4 --> 3 --> 

### 1.2.3 Beam Search 

In [None]:
def BeamSearch():
    pass

### Reference : Transition-Based
- https://www.youtube.com/watch?v=xG0MXIPVUdk&list=PLyyEwPZh6aHpDieqWwr7hTgreKnYSY9Ox&index=60
- https://www.youtube.com/watch?v=2jLk93iIyrw&list=PLr9TFf9GjancAHiiP5cqGmYsZad99Uf_h&index=39
- https://www.youtube.com/watch?v=f5-hTA9hA3s&list=WL&index=8
- https://www.youtube.com/watch?v=oLHnqGmQtI4
- http://www.cs.umd.edu/class/fall2017/cmsc723/slides/slides_13.pdf
- Chapter 15 Dependency Parsing, Speech and Language Processing. Daniel Jurafsky & James H. Martin. Copyright c 2019. All rights reserved. Draft of October 2, 2019.
- Covington, M. (2001). A fundamental algorithm for dependency parsing. In Proceedings of the 39th Annual ACM Southeast Conference, 95–102.
- Nivre, J. (2003). An efficient algorithm for projective dependency parsing. In Proceedings of the 8th International Workshop on Parsing Technologies (IWPT).

# 2. Graph-Based Dependency Parsing

## 2.1. Binary Tree (Non-related Dependency Parsing)

In [22]:
class Node(object):
    def __init__(self, key):
        self.left  = None
        self.right = None
        self.key = key
        
    def insert(self, key):
        if(self.key):
            if(key < self.key):
                if(self.left == None):
                    self.left = Node(key)
                else:
                    self.left.insert(key)
            
            elif(key >= self.key):
                if(self.right == None):
                    self.right = Node(key)
                else:
                    self.right.insert(key)
        else:
            self.key = key
    
    def printT(self):        
        if (self.left):
            self.left.printT()
        print(self.key)
        if (self.right):
            self.right.printT()
    
    def delete(self,value):
        if value < self.key: #CaseD
            if self.left:
                self.left = self.left.delete(value)
        elif value > self.key: #CaseC
            if self.right:
                self.right = self.right.delete(value)
        else:
            if self.left == None and self.right == None: #Case A and B
                return None
            elif self.left == None:
                return self.right
            elif self.right == None: 
                return self.left

In [24]:
word_list

[(1, 'I', 'am'), (2, 'am', 'am'), (3, 'hungry', 'am')]

In [23]:
root = Node(len(word_list))
for i in word_list:
    root.insert(i[0])

root.printT()

1
2
3
3


## 2.2 Maximum Spanning Tree (Chu-Liu-Edmonds)

In [532]:
class Edmonds: 
    adj = []
    def __init__(self, V, E): 
        self.V = V
        self.E = E
        Edmonds.adj = [[0 for i in range(self.V)] for j in range(self.V)]
    
    def BestInEdge():
        pass

    def MaximumSpanningTree(self,V,E,root,score):
        F = []
        T = []
        score = []
        for v in range(self.V):
            best = self.BestInEdge()
            F = F and best
            for e in range(self.E):
                score[e] -= score[best]

            if T:
                return T
            else:
                C =


    def addEdge(self, s, e): #Add to Matrix
        Edmonds.adj[s-1][e-1] = Edmonds.adj[e-1][s-1] = 1


In [533]:
size = len(word_list)
Matrix = Edmonds(size,size)
# Matrix.addEdge(1, 2)
# Matrix.addEdge(2, 6)
# Matrix.addEdge(3, 4)
# Matrix.addEdge(4, 6)
# Matrix.addEdge(5, 3)
# Matrix.addEdge(5, 4)
Matrix.adj

[[0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0]]

### Reference 
- https://courses.cs.washington.edu/courses/cse490u/17wi/slides/CLE.pdf
- https://www.youtube.com/watch?v=dOCRzahEL84

# Future Work
- Projective vs Non-Projective