In [24]:
#####################################################################
#                       Libraries:                                  #
#####################################################################
import pandas as pd
import numpy as np
import matplotlib as plt
# dealing with operations systems like reading a file
import os 
import xml.etree.ElementTree as ET
from enum import Enum

In [25]:
#####################################################################
#                      Functions Details:                           #
#####################################################################
## Rechieve all scene's ID in text to process -- their type is 'H'
def Get_Scene_ID(layer, Elaborator_ID): 
    List_ID = []
    for element in layer:    
        if element.tag == 'node':
            for child_node in element:
                if child_node.tag == 'edge' and child_node.attrib["type"] == 'H':                 
                    List_ID.append(child_node.attrib["toID"])
                    Following_Node = Get_Node_By_ID(layer,child_node.attrib["toID"])                    
                    List1 = Create_Intermidiate_Scene(layer, Following_Node, Elaborator_ID)                    
                    List_ID.extend(List1)
    return List_ID, Elaborator_ID
#####################################################################
def Create_Intermidiate_Scene(layer, Scene_Node, Elaborator_ID):  
    ListID = []
    if Is_Node_Terminal(Scene_Node):
        return []
    else:                       
        for child_node in Scene_Node:            
            if child_node.tag == 'edge' and child_node.attrib["type"] == 'E':                 
                Next_nd = Get_Node_By_ID(layer, child_node.attrib["toID"])
                if Is_Node_Terminal(Next_nd) == 0 and Next_nd.attrib["ID"] not in ListID:
                    ListID.append(Next_nd.attrib["ID"]) 
                    Elaborator_ID.append(Next_nd.attrib["ID"])
            elif child_node.tag == 'edge' and child_node.attrib["type"] != 'E': 
                Next_nd = Get_Node_By_ID(layer, child_node.attrib["toID"])
                List1 = Create_Intermidiate_Scene(layer, Next_nd, Elaborator_ID)                
                ListID.extend(List1)                                    
        return ListID
                        
#####################################################################
# Get the related node to an edge (by attribut 'toID')
def Get_Node_By_ID(layer,ID):    
    for node in layer:
        if node.tag == 'node' and node.attrib["ID"] == ID:            
            return node
#####################################################################
# Check if all edges of a node are Terminals, or not
def Is_Node_Terminal(node): 
    for child in node:
        if child.tag == 'edge' and child.attrib["type"] != "Terminal":
            return 0
    return 1       
#####################################################################  
# if it's Temrial or A or P .. check all different types in paper
def Get_Type_Edge(edge): 
    if edge.tag == 'edge':
        return edge.attrib["type"]           
##################################################################### 
# Get word related to an edge from layer 1.
def Get_Word(layer, ID) : 
    for node in layer:    
        if node.tag == 'node' and node.attrib["ID"] == ID: 
            for child_node in node:       
                return child_node.attrib["text"]
#####################################################################
# Get the original sentence we have as Input; just to make a visual comparaison 
def Get_Original(layer): 
    original = ""
    for node in layer:    
        if node.tag == 'node':
            for child_node in node:
                if node.attrib["type"] == 'Punctuation':
                    original = original[:len(original)-1] + child_node.attrib["text"] + " "
                else:
                    original += child_node.attrib["text"] + " "
    return original     
##################################################################### 
# Get all edge's ID of a specific Node
def Get_All_Edge_ID(layer, NodeID): 
    List = []
    for node in layer:
        if node.tag == "node" and node.attrib["ID"] == NodeID:
            for child in node:
                if child.tag == 'edge':
                    List.append(child.attrib["toID"])
    return List       
##################################################################### 
# if a node is Terminal, means all its edges are Terminals so we rechieve the text presented by it
def Get_Text_From_Terminal_Node(layer1, layer2, terminal_Nd): 
    Text = ""
    if Is_Node_Terminal(terminal_Nd):
        for edgeID in Get_All_Edge_ID(layer2, terminal_Nd.attrib["ID"]):
                Text += Get_Word(layer1, edgeID) + " "
    return Text            
##################################################################### 
# Tree iteration , with deep course -- return the text
def recursive(layer1, layer2, Node,All_sceneID, Elaborator_ID):     
    Text = ""
    if Is_Node_Terminal(Node):
        Text += Get_Text_From_Terminal_Node(layer1, layer2, Node)
        return Text
    else:        
        for child in Node:
            if child.tag == "edge":
                if child.attrib["toID"] not in All_sceneID:                                                      
                    if Get_Type_Edge(child) == "Terminal":
                        Text += Get_Word(layer1, child.attrib['toID']) + " "
                    elif Get_Type_Edge(child) == "F":
                        continue
                    else:
                        Next_nd = Get_Node_By_ID(layer2, child.attrib['toID'])                
                        Text += recursive(layer1, layer2, Next_nd, All_sceneID, Elaborator_ID )
    return Text
##################################################################### 
# Combine all scenes in one sentence simple as output
def Get_Simple(Lists_Scene):    
    simple = ''
    for scene in Lists_Scene:
        simple += scene[0].upper() + scene[1: len(scene)-1] + '. '
    return simple
##################################################################### 
# The main function returns the entire sipmle sentence and also list of scene independently, the return is a list of two lists
def Main(layer1, layer2):
    Lists_Scene, Elaborator_ID, Lists_Scene = ([] for i in range(3)) # initialize 3 empty list at same time
    All_sceneID, Elaborator_ID = Get_Scene_ID(layer2, Elaborator_ID) # this fucntion return two list  
    # loop on differents scene and get scene per scene
    for sceneID in All_sceneID: 
        result = ""
        Node_Scene = Get_Node_By_ID(layer2, sceneID) # get the first Node the scene started with
        result = recursive(layer1, layer2, Node_Scene, All_sceneID, Elaborator_ID) # recursive to get text by initial Node_scene
        Lists_Scene.append(result) # append final result which contain the text represented by te Node_Scene
    simple = Get_Simple(Lists_Scene)
    return simple, Lists_Scene # this list contains all scene
##################################################################### 

In [26]:
##################################################################### 
#                     Load data & Variables:                       #
#####################################################################
XML_file = 'out.xml'
tree = ET.parse(XML_file)
root = tree.getroot()

layer1 = root[1]
layer2 = root[2]

In [27]:
#####################################################################
#                              Main Operations:                     #
#####################################################################

simple, Lists_Scene = Main(layer1, layer2) # Main function,  see details above

print("Original: ", Get_Original(layer1))    
print("Simple: ", simple)            

Original:  He observed the planet which has 14 satellites. He came back home and played Guitar. 
Simple:  He observed the planet. Planet has 14 satellites. He came back home. He played Guitar. 
