# Introduction

The following Jupyter Notebook generates visualizations that provide a starting point to analyze gender relations and gender distribution in a selected drama from the German corpus of the DraCor dataset. <br><br>
To generate the visualization, the following 3 code cells must be executed.

# Step 1: Preparation
In this code cell, the required libraries are imported and helper functions are defined; nothing more needs to be done than to execute the cell.

In [10]:
import pandas as pd
import altair as alt
import ipywidgets as widgets
import networkx as nx
import nx_altair as nxa

def set_character_name(graph):
    for node_iterator in graph.nodes: 
        node = graph.nodes[node_iterator]
        node['Name'] = node['label']
    return graph

def relation_name_mapping():
    relation = pd.DataFrame(
        {'Relation': ['parent_of', 'lover_of', 'related_with', 'associated_with', 'siblings', 'spouses', 'friends']}
    )
    relation_name_mapping = {
        'parent_of': 'Parent-child',
        'lover_of': 'Lovers',
        'related_with': 'Related',
        'associated_with': 'Associated',
        'siblings': 'Siblings',
        'spouses': 'Spouses',
        'friends': 'Friends'
    }
    relation['Relation_Display'] = relation['Relation'].map(relation_name_mapping)
    return relation

def gender_name_mapping():
    gender = pd.DataFrame({'Gender': ['MALE','FEMALE', 'UNKNOWN']})
    gender_name_mapping = {
        'MALE': 'Male',
        'FEMALE': 'Female',
        'UNKNOWN': 'Unknown',
    }
    gender['Gender_Display'] = gender['Gender'].map(gender_name_mapping)
    return gender

def get_words_by_gender(nodes):
    female_words = 0
    male_words = 0
    unknown_words = 0
    for node_iterator in nodes: 
        node = nodes[node_iterator]
        if node['Gender'] == 'FEMALE':
           female_words += node['Number of spoken words']
        elif node['Gender'] == 'MALE':
           male_words += node['Number of spoken words']
        elif node['Gender'] == 'UNKNOWN':
           unknown_words += node['Number of spoken words']
    return [male_words, female_words, unknown_words]

def chunked_title(title):
    chunked_title = []
    current_chunk = ""
    for word in title.split():
        if len(current_chunk) + len(word) <= 70:
            current_chunk += f"{word} "
        else:
            chunked_title.append(current_chunk.strip())
            current_chunk = f"{word} "
    chunked_title.append(current_chunk.strip())
    return chunked_title

# Step 2: Choose a play 
- After the following code is executed, a dropdown menu appears below from which any drama can be selected.<br><br>
- You can also search for a specific drama by entering the first letters of the desired drama on the keyboard.<br><br>
- If the visualization has already been generated and you would like to select a different drama, select the appropriate drama from the dropdown menu and **execute Step 3 again**.

In [18]:
german_metadata = pd.read_csv('data/german_metadata.csv', encoding='utf-8')

dropdown_items = dict(zip(german_metadata['title'], german_metadata['id']))
dropdown_items = dict(sorted(dropdown_items.items()))

dropdown = widgets.Dropdown(
    options=dropdown_items,
    description='Select play:',
)

dropdown

Dropdown(description='Select play:', options={'Abellino': 'ger000275', 'Agnes Bernauer': 'ger000043', 'Agnes B…

# Step 3: Visualize data

This code block generates the actual visualization about gender distribution and relations for the chosen play after execution below the code block. <br>There are two filters on the left-hand side and additional information is displayed when you hover over the elements of the visualization with the mouse.

In [27]:
play_id = dropdown.value

if play_id != None:
    ############################## Network Chart ##############################
    # parse graphs
    relations_graph = nx.read_graphml(f"data/relations/{play_id}.graphml")
    cooccurence_graph = nx.read_graphml(f"data/cooccurence/{play_id}.graphml")
    
    # add Name attribute to nodes
    relations_graph = set_character_name(relations_graph)

    # define the graph layout
    layout = nx.shell_layout(relations_graph)

    # draw base graph with nx_altair
    base = nxa.draw_networkx(
        relations_graph,
        pos=layout,
        node_tooltip=['Name'],
        node_color='lightgray',
        edge_color='Relation',
        #node_size= 'size',
        width=4
    )

    # get the edge layer
    edges = base.layer[0]
    # get the node layer
    nodes = base.layer[1]

    # define relation filter
    relation = relation_name_mapping()
    relation_selection = alt.selection_point(fields=['Relation'], toggle="true")
    relation_color = alt.condition(
        relation_selection,
        alt.Color('Relation:N', legend=None),
        alt.value('lightgray')
    )
    relation_filter = alt.Chart(
        relation,
        title=alt.TitleParams('Filter relation', anchor='start')
    ).mark_rect(cursor='pointer').encode(
        y=alt.Y('Relation_Display', title=''),
        color=relation_color
    ).add_params(relation_selection)

    # encode relation as edge color and add relationship filter
    edges = edges.encode(color=relation_color,tooltip='Relation').transform_filter(relation_selection)

    # define gender filter
    gender = gender_name_mapping()
    gender_selection = alt.selection_point(fields=['Gender'], toggle="true")
    gender_color = alt.condition(
        gender_selection,
        alt.Color('Gender:N', legend=None),
        alt.value('lightgray')
    )
    gender_shape = alt.Shape('Gender:N', legend=None)
    gender_filter = alt.Chart(
        gender,
        title=alt.TitleParams('Filter gender', anchor='start')
    ).mark_point(
        size=300,
        cursor='pointer',
        filled=True,
        opacity=1
    ).encode(
        y=alt.Y('Gender_Display', title=''),
        color=gender_color,
        shape=gender_shape
    ).add_params(gender_selection)

    # encode gender as node shape+color and add gender filter
    nodes = nodes.encode(
        color=gender_color,
        fill=gender_color,
        shape=gender_shape
    ).add_params(gender_selection)

    # layer network chart
    network_chart = (edges + nodes).properties(
        width=400,
        height=400
    )
    network_chart_with_filters = ((gender_filter & relation_filter) | network_chart)

    ############################## Pie Charts ##############################

    # count characters by gender
    play_metadata = german_metadata[german_metadata["id"] == play_id].reset_index()
    speakers = play_metadata[['num_of_speakers_male', 'num_of_speakers_female', 'num_of_speakers_unknown']]
    gender['Characters'] = speakers.loc[0,:].values.tolist()
    gender_distribution_pie_chart= alt.Chart(gender, title='Number of characters by gender').mark_arc().encode(
        theta='Characters',
        color=alt.Color('Gender:N', legend=None),
        tooltip=['Characters']
    ).properties(
        width=200,
        height=200
    )

    # aggregate spoken words by gender
    gender['Spoken words'] = get_words_by_gender(cooccurence_graph.nodes)
    spoken_words_pie_chart = alt.Chart(gender, title='Number of spoken words by gender').mark_arc().encode(
        theta='Spoken words',
        color=alt.Color('Gender:N', legend=None),
        tooltip=['Spoken words']
    ).properties(
        width=200,
        height=200
    )

    stacked_pie_charts = (gender_distribution_pie_chart & spoken_words_pie_chart)
    
    ############################## Final Chart ##############################

    title = chunked_title(f"Gender distribution and relations in {dropdown.label}")
    final_chart = (network_chart_with_filters | stacked_pie_charts)
    final_chart = final_chart.configure_view(
            strokeWidth=0 # remove border
    ).configure_axis(
        domainOpacity=0 # remove axis
    ).properties(
        title=alt.TitleParams(
            title,
            anchor='middle',
            fontSize=20
        )
    )
    final_chart.save('final_chart.html')
else:
   final_chart = 'no plays available'

final_chart

Source: German Drama Corpus provided by the Drama Corpus (DraCor) Project https://dracor.org/ger CC0 As of 06.02.2024

Fischer, Frank, et al. (2019). Programmable Corpora: Introducing DraCor, an Infrastructure for the Research on European Drama. In Proceedings of DH2019: "Complexities", Utrecht University, doi:10.5281/zenodo.4284002.