# Graph Generation Scripts

Edit the variables below and run the cells in order to generate graph files that can be opened in Gephi

In [None]:
# 1: Enter the input file name

input_file_name = '203_rimeoftheancientmariner_SCBClean1798textvalidated.xml'

In [None]:
# 2: Generate output file names

output_root = input_file_name[:-4]

complete_output_file_name = output_root +'-complete_new.gexf'
syuzhet_output_file_name = output_root + '-syuzhet.gexf'
topoi_output_file_name = output_root + '-topoi.gexf'
chronotope_output_file_name = output_root + '-chronotopes.gexf'
chronotope_II_output_file_name = output_root + '-chronotopes-ii.gexf'
chronotope_III_output_file_name = output_root + '-chronotopes-iii.gexf'

In [None]:
# 3: Import libraries

from lxml import etree
import networkx as nx

In [None]:
# 4: Read the XML file and create empty graph objects
tree = etree.parse('files/xml/' + input_file_name)
root = tree.getroot()
complete = nx.DiGraph()
syuzhet = nx.DiGraph()
topoi = nx.DiGraph()
chronotope = nx.Graph()
chronotope_ii = nx.DiGraph()

In [None]:
chapters = []
for el in root.iter('chapter'):
    chapters.append(el)

In [None]:
for el in chapters:
    print(el)

In [None]:
# 5: Define the functions for creating the graphs

def complete_graph(xml_element, graph):
    """
    Takes an XML element marked up using CLAYE and returns a populated graph of the spatial nodes, 
    litonyms, and the implied physical, psychological, or sensory connections between them. 
    
    Params:
    xml_element: an lxml XML element
    graph: a NetworkX Graph() object - nx.Graph()

    """
    
    # Add all the litonyms and topoi as nodes and connections as edges first
    for toporef in xml_element.iter('toporef'):
        graph.add_node('"' + toporef.text + '"', node_type='toporef')
    
    for topos in xml_element.iter('topos'):
        try:
            framename = topos.attrib['framename']
            new_length = graph.nodes[framename]['length'] + len(''.join(topos.itertext()).strip())
            graph.nodes[framename]['length'] = new_length
        except KeyError:
            graph.add_node(topos.attrib['framename'], length=len(''.join(topos.itertext()).strip()), chronotope=topos.attrib['type'], node_type="topos")
    
    for connection in xml_element.iter('connection'):
        try:
            graph.add_edge(connection.attrib['source'], connection.attrib['target'], relation=connection.attrib['relation'])
        except:
            graph.add_edge(connection.attrib['source'], connection.attrib['target'], relation='none')
    
    # Connect the toporefs to the containing topoi
    for toporef in xml_element.iter('toporef'):
        if ('sequence' in toporef.attrib.keys()):
            pass
        else:
            parent = toporef.getparent()
            containing_node = None
            if parent.tag == 'topos':
                containing_node = parent.attrib['framename']
            elif parent.tag == 'connection':
                containing_topos = parent.getparent()
                try:
                    containing_node = containing_topos.attrib['framename']
                except:
                    pass
            try:
                graph.add_edge(containing_node, '"' + toporef.text + '"', relation=toporef.attrib['relation'])
            except:
                graph.add_edge(containing_node, '"' + toporef.text + '"', relation='none')
    
    # Connect the toporef sequences to one another
    sequences = {}
    for toporef in xml_element.iter('toporef'):
        try:
            sequence = toporef.attrib['sequence']
            sequences[sequence] = []
        except:
            pass
        
    for sequence in sequences.keys():
        for toporef in xml_element.iter('toporef'):
            try:
                sequence = toporef.attrib['sequence']
                sequences[sequence].append(toporef)
            except:
                pass
    
    for sequence, toporef_list in sequences.items():
        prev_toporef = None
        
        for toporef in toporef_list:    
            if prev_toporef == None:
                parent = toporef.getparent()
                containing_node = None
                if parent.tag == 'topos':
                    containing_node = parent.attrib['framename']
                elif parent.tag == 'connection':
                    containing_topos = parent.getparent()
                    try:
                        containing_node = containing_topos.attrib['framename']
                    except:
                        pass
                try:
                    graph.add_edge(containing_node, '"' + toporef.text + '"', relation=toporef.attrib['relation'])
                except:
                    graph.add_edge(containing_node, '"' + toporef.text + '"', relation='none')
                
                prev_toporef = toporef
                    
            else:
                graph.add_edge('"' + prev_toporef.text + '"', '"' + toporef.text + '"', relation=toporef.attrib['relation'])
                prev_toporef = toporef
            
        


def syuzhet_graph(xml_element, graph):
    """
    Takes an XML element marked up using CLAYE and returns a populated graph of the spatial nodes
    connected sequentially as they appear in the text
    Corresponds (loosely) with the syuzhet or story order of the text.
    
    Params:
    xml_element: an lxml XML element
    graph: a NetworkX Graph() object - nx.Graph()

    """
    topoi = []

    for topos in xml_element.iter('topos'):
        topoi.append([topos.attrib['framename'], topos.attrib['type'], len(''.join(topos.itertext()).strip())])

    prev_node = None
    
    for t in topoi:
        if prev_node == None:
            prev_node = t[0]
            graph.add_node(t[0], chronotope=t[1], length=t[2])
        else:
            try:
                graph.nodes[t[0]]['length'] += t[2]
            except KeyError:
                graph.add_node(t[0], chronotope=t[1], length=t[2])

            graph.add_edge(prev_node, t[0])
            prev_node = t[0]

            
def topoi_graph(xml_element, graph):
    """
    Iterate over an XML element and its children and generate a graph of topoi nodes and connections, including attributes.
    xml_element: an eTree XML element
    graph: a NetworkX Graph() object - nx.Graph()
    """
    
    for topos in xml_element.iter('topos'):
        try:
            graph.nodes[topos.attrib['framename']]['length'] += len(''.join(topos.itertext()).strip())
        except KeyError:
            graph.add_node(topos.attrib['framename'], chronotope=topos.attrib['type'], length=len(''.join(topos.itertext()).strip()))

    for c in xml_element.iter('connection'):
        try:
            graph.add_edge(c.attrib['source'], c.attrib['target'], relation=c.attrib['relation'])
        except:
            print(c.attrib)



def chronotope_graph(xml_element, graph):
    """
    Takes an XML element marked up using CLAYE and returns a populated graph of the topoi
    and their associated chronotopes
    
    Params:
    xml_element: an lxml XML element
    graph: a NetworkX Graph() object - nx.Graph()
    """
    
    for topos in xml_element.iter('topos'):
        graph.add_node(topos.attrib['type'], node_type='chronotope')
        graph.add_node(topos.attrib['framename'], node_type='setting')
        graph.add_edge(topos.attrib['type'], topos.attrib['framename'])


def chronotope_graph_ii(xml_element, graph):
    """
    Takes an XML element marked up using CLAYE and returns a populated graph of the
    chronotope archetypes and their connections
    
    Params:
    xml_element: an lxml XML element
    graph: a NeworkX Graph() object - nx.Graph()
    """
    
    topoi = {}
    for topos in xml_element.iter('topos'):
        try: 
            chronotope = topos.attrib['type']
            graph.nodes[chronotope]['length'] += len(''.join(topos.itertext()).strip())
        except KeyError:
            graph.add_node(topos.attrib['type'], length=len(''.join(topos.itertext()).strip()))
        topoi[topos.attrib['framename']] = topos.attrib['type']
    
    for connection in xml_element.iter('connection'):
        try:
            source_chronotope = topoi[connection.attrib['source']]
            target_chronotope = topoi[connection.attrib['target']]
            graph.add_edge(source_chronotope, target_chronotope)
        except:
            pass

        
def chronotope_graph_iii(xml_element, graph):
    """
    Takes an XML element marked up using CLAYE and returns a populated graph of the chronotope archteypes, 
    their connections, and their associated toporefs
    
    """
    topoi = {}
    for topos in xml_element.iter('topos'):
        try: 
            chronotope = topos.attrib['type']
            graph.nodes[chronotope]['length'] += len(''.join(topos.itertext()).strip())
        except KeyError:
            graph.add_node(topos.attrib['type'], length=len(''.join(topos.itertext()).strip()))
        topoi[topos.attrib['framename']] = topos.attrib['type']
    
    for connection in xml_element.iter('connection'):
        try:
            source_chronotope = topoi[connection.attrib['source']]
            target_chronotope = topoi[connection.attrib['target']]
            graph.add_edge(source_chronotope, target_chronotope)
        except:
            pass
    
    for topos in xml_element.iter('topos'):
        try:
            chronotope = topos.attrib['type']
            for toporef in topos.iter('toporef'):
                graph.add_edge(chronotope, '"' + toporef.text + '"', relation=toporef.attrib['relation'])
        except:
            pass

In [None]:
# 6: Complete

complete = nx.DiGraph()

complete_graph(root, complete)

with open('files/graphs/' + complete_output_file_name, 'a') as output_file:
    for line in nx.readwrite.gexf.generate_gexf(complete):
        output_file.write(line)

In [None]:
# 7: Syujhet

syuzhet_graph(root, syuzhet)

with open('files/graphs/' + syuzhet_output_file_name, 'a') as output_file:
    for line in nx.readwrite.gexf.generate_gexf(syuzhet):
        output_file.write(line)

In [None]:
# 8: Topoi

topoi_graph(root, topoi)

with open('files/graphs/' + topoi_output_file_name, 'a') as output_file:
    for line in nx.readwrite.gexf.generate_gexf(topoi):
        output_file.write(line)

In [None]:
# 9: Chronotopes I

chronotope_graph(root, chronotope)

with open('files/graphs/' + chronotope_output_file_name, 'a') as output_file:
    for line in nx.readwrite.gexf.generate_gexf(chronotope):
        output_file.write(line)

In [None]:
# 10: Chronotopes II
chronotope_ii = nx.DiGraph()

chronotope_graph_ii(root, chronotope_ii)

with open('files/graphs/' + chronotope_II_output_file_name, 'a') as output_file:
    for line in nx.readwrite.gexf.generate_gexf(chronotope_ii):
        output_file.write(line)

In [None]:
# 11: Chronotopes III

chronotope_iii = nx.DiGraph()

chronotope_graph_iii(root, chronotope_iii)

with open('files/graphs/' + chronotope_III_output_file_name, 'a') as output_file:
    for line in nx.readwrite.gexf.generate_gexf(chronotope_iii):
        output_file.write(line)