# Documentation on time analysis
The main goal of the time/space analysis was to answer these questions:
- when is the screenplay mostly set? Day or night?
- are the scenes mostly shot inside or outside?
- are the scenes that are shot inside/outside mostly set during the day or night?

This analysis gives an idea of the prevalent time and space setting, extracting the stage directions that the director had written on the screenplay. 

The first thing that was done was to search for all the places inside the texts and then tag inside the **XML-TEI document**. The stage directions are tagged using ```<stage>``` , the time in which the scene takes place is specified using the attribute type="time", and the setting using type="environment". 

The libraries needed to extract information from the TEI document and visualize graphically the data are: [xml.etree](https://docs.python.org/3/library/xml.etree.elementtree.html#) and [matplotlib](https://matplotlib.org/stable/tutorials/index).

**xml.etree** is used to progressively search into the hierarchical structure of the XML-TEI document. In this case we need to search inside the TEI header the content inside the tag ```<stage>``` two specific attributes which content is stored in lists. 
`time_analysis_list` is a list of tuples, each tuple contains the information of time setting and related environment.
We iterate over the list, and use `time_analysis_dict` to count the occurrences that are now values of the dictionary.

**matplotlib** is a library used to create graphs based on the data that we have extracted before. 
In this case we wanted to represent our data with a pie chart that represents the percentage of space/time settings. 
The values of the dictionary, so the occurrences, are stored into a list, and then automatically transformed into percentage values. 
The keys are used as labels. 




In [2]:
from xml.etree import ElementTree as ET
import pandas as pd
from math import pi
from bokeh.palettes import Category20c
from bokeh.plotting import figure, output_file, show
from bokeh.transform import cumsum
from bokeh.models import ColumnDataSource, CustomJS, OpenURL, TapTool, Div


# Function to analyze XML data and create a pie chart
def timeanalysis(xml_file_path):
    try:
        tree = ET.parse(xml_file_path)
        root = tree.getroot()
    except ET.ParseError as e:
        print(f"Error parsing XML: {e}")
        return

    time_analysis_dict = {}

    time_stage_elements = root.findall(".//{http://www.tei-c.org/ns/1.0}stage[@type='time']")
    env_stage_elements = root.findall(".//{http://www.tei-c.org/ns/1.0}stage[@type='environment']")

    for time_stage, env in zip(time_stage_elements, env_stage_elements):
        time_content = time_stage.text.strip() if time_stage.text else ""
        env_content = env.text.strip() if env.text else ""
        pair = (time_content, env_content)

        if pair in time_analysis_dict:
            time_analysis_dict[pair] += 1
        else:
            time_analysis_dict[pair] = 1
    

    data = pd.DataFrame({'pair': list(time_analysis_dict.keys()), 'value': list(time_analysis_dict.values())})

    data['angle'] = data['value'] / data['value'].sum() * 2 * pi
    data['color'] = Category20c[len(data)]

    p = figure(height=350, title="Pie Chart", toolbar_location=None,
               tools="hover", tooltips="@pair: @value", x_range=(-0.5, 1.0))

    source = ColumnDataSource(data=data)

    p.wedge(x=0, y=1, radius=0.4,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color='color', legend_field='pair', source=source)

    show(p)
if __name__ == "__main__":
    xml_file_path = "C:/Users/crosi/Documents/GitHub/metascript/eyes-wide-shut-1999-transcription.xml"
    pie_chart = timeanalysis(xml_file_path)
    




In [1]:
from xml.etree import ElementTree as ET
import plotly.graph_objects as go

def timeanalysis(xml_file_path):
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    n_list = []
    time_list = []
    env_list = []

    for tag in root.findall('.//{http://www.tei-c.org/ns/1.0}div[@type="scene"]'):
        listener_tag = tag.get('n')
        n_list.append(listener_tag)

        time_stage_elements = tag.findall(".//{http://www.tei-c.org/ns/1.0}stage[@type='time']")
        for i in time_stage_elements:
            a = i.text
            time_list.append(a)

        env_stage_elements = tag.findall(".//{http://www.tei-c.org/ns/1.0}stage[@type='environment']")
        for i in env_stage_elements:
            a = i.text
            env_list.append(a)

    
    x_list = n_list  
    y1_list = time_list  
    y2_list = env_list  

    # Create traces with different y-axes on the right side
    trace1 = go.Scatter(x=x_list, y=y1_list, mode='lines', name='Time', yaxis='y1')
    trace2 = go.Scatter(x=x_list, y=y2_list, mode='lines', name='Environment', yaxis='y2')

    # Create layout with two y-axes on the right side
    layout = go.Layout(
        title='Time and Environment Analysis',
        xaxis=dict(title='Scene'),
        yaxis=dict(title='Time', side='left', showgrid=False),
        yaxis2=dict(title='Environment', side='right', overlaying='y', showgrid=False)
    )

    # Create a figure with the traces and layout
    fig = go.Figure(data=[trace1, trace2], layout=layout)

    # Show the figure
    fig.show()

# Call the function with the path to your XML file
timeanalysis('C:/Users/crosi/Documents/GitHub/metascript/eyes-wide-shut-1999-transcription.xml')

