In [None]:
! pip install nx_altair

In [None]:
! pip install pydracor

In [21]:
import pandas as pd
import altair as alt
import networkx as nx
import nx_altair as nxa
from pydracor import *

dracor = DraCor()

# Step 0: Preparation
Import libraries and instantiate DraCor api.

# Step 1: Available German plays
Choose a play you are interested in from the list.

In [None]:
plays = dracor.play_id_to_play_title()
available_german_plays = filtered_grades = dict(filter(lambda play: 'ger0' in play[0], plays.items()))
available_german_plays

# Step 2: Fetch data
Request metadata and networks for a chosen play from the DraCor api.

In [23]:
# play = Play('ger000088') # Emilia Galotti
# play = Play('ger000378') # Napoleon oder Die hundert Tage, 259 Characters
play = Play('ger000311') # Maria Stuart
# play = Play('ger000201') # Faust zwei, 189 Characters

# networkX doesn't support mix of directed+undirected Graphs & nx_altair's arrows look broken
# workaround: make graph undirected
relations_graphml = play.relations_graphml()
relations_graphml = relations_graphml.replace('directed="true"', 'directed="false"')
G = nx.parse_graphml(relations_graphml)

# load corpus metadata
german_corpus = Corpus('ger')
german_metadata = pd.DataFrame(german_corpus.metadata())

# load play metadata
play_id = play.id
play_metadata = german_metadata[german_metadata["id"] == play_id].reset_index()

# aggregate number of spoken words by gender
cooccurence_graphml = play.graphml()
cooccurence_graph = nx.parse_graphml(cooccurence_graphml)
female_words = 0
male_words = 0
unknown_words = 0
for node_iterator in cooccurence_graph.nodes:
    node = cooccurence_graph.nodes[node_iterator]
    if node['Gender'] == 'FEMALE':
       female_words += node['Number of spoken words']
    elif node['Gender'] == 'MALE':
       male_words += node['Number of spoken words']
    elif node['Gender'] == 'UNKNOWN':
       unknown_words += node['Number of spoken words']

spoken_words_list = [male_words, female_words, unknown_words]

# Step 3: Visualize data
Draw charts about gender distribution and relations for the chosen play.

In [24]:
############################## Network Chart ##############################

# define the graph layout
layout = nx.shell_layout(G)

# draw base graph with nx_altair
base = nxa.draw_networkx(
    G,
    pos=layout,
    node_tooltip=['label', 'Gender'],
    node_color='lightgray',
    edge_color='Relation',
    width=2
)

In [25]:
# get the edge layer
edges = base.layer[0]
# get the node layer
nodes = base.layer[1]

In [32]:
!pip install --upgrade altair



In [30]:
# error due to different version: colab 4, original notebook=5 <- if not solved, install jupyternotebook on laptop
# define relationship filter
relation = pd.DataFrame(
    {'Relation': ['parent_of', 'lover_of', 'related_with', 'associated_with', 'siblings', 'spouses', 'friends']}
)

relation_selection = alt.selection_point(fields=['Relation'], toggle="true")
relation_color = alt.condition(
    relation_selection,
    alt.Color('Relation:N', legend=None),
    alt.value('lightgray')
)
relation_filter = alt.Chart(
    relation,
    title=alt.TitleParams('Filter relation', anchor='start')
).mark_rect(cursor='pointer').encode(
    y=alt.Y('Relation', title=''),
    color=relation_color
).add_params(relation_selection)

AttributeError: module 'altair' has no attribute 'selection_point'

In [None]:
# Mapping of original relation names to new names
relation_name_mapping = {
    'parent_of': 'Parent',
    'lover_of': 'Lover',
    'related_with': 'Related',
    'associated_with': 'Associated',
    'siblings': 'Siblings',
    'spouses': 'Spouse',
    'friends': 'Friend'
}

In [None]:
# Create a new column with the modified names
relation['Relation_Display'] = relation['Relation'].map(relation_name_mapping)

# Chart with modified names
relation_filter = alt.Chart(
    relation,
    title=alt.TitleParams('Filter relation', anchor='start')
).mark_rect(cursor='pointer').encode(
    y=alt.Y('Relation_Display', title=''),  # Use the new column for display
    color=relation_color
).add_params(relation_selection)


In [27]:
# encode relation as edge color and add relationship filter
edges = edges.encode(color=relation_color).transform_filter(relation_selection)

NameError: name 'relation_color' is not defined

In [28]:
# define gender filter
gender = pd.DataFrame({'Gender': ['MALE','FEMALE', 'UNKNOWN']})
gender_selection = alt.selection_point(fields=['Gender'], toggle="true")
gender_color = alt.condition(
    gender_selection,
    alt.Color('Gender:N', legend=None),
    alt.value('lightgray')
)
gender_shape = alt.Shape('Gender:N', legend=None)
gender_filter = alt.Chart(
    gender,
    title=alt.TitleParams('Filter gender', anchor='start')
).mark_point(
    size=300,
    cursor='pointer',
    filled=True,
    opacity=1
).encode(
    y=alt.Y('Gender', title=''),
    color=gender_color,
    shape=gender_shape
).add_params(gender_selection)

AttributeError: module 'altair' has no attribute 'selection_point'

In [None]:
# encode gender as node shape+color and add gender filter
nodes = nodes.encode(
    color=gender_color,
    fill=gender_color,
    shape=gender_shape
).add_params(gender_selection)


In [None]:
# layer network chart
network_chart = (edges + nodes).properties(
    width=400,
    height=400
)
network_chart_with_filters = ((gender_filter & relation_filter) | network_chart)

In [None]:
############################## Pie Charts ##############################

# count characters by gender
speakers = play_metadata[['num_of_speakers_male', 'num_of_speakers_female', 'num_of_speakers_unknown']]
gender['Characters'] = speakers.loc[0,:].values.tolist()
gender_distribution_pie_chart= alt.Chart(gender, title='Gender distribution').mark_arc().encode(
    theta='Characters',
    color=alt.Color('Gender:N', legend=None),
    tooltip=['Gender', 'Characters']
).properties(
    width=200,
    height=200
)

In [None]:
# aggregated spoken words by gender
gender['Spoken words'] = spoken_words_list
spoken_words_pie_chart = alt.Chart(gender, title='Number of spoken words by gender').mark_arc().encode(
    theta='Spoken words',
    color=alt.Color('Gender:N', legend=None),
    tooltip=['Gender', 'Spoken words']
).properties(
    width=200,
    height=200
)

stacked_pie_charts = (gender_distribution_pie_chart & spoken_words_pie_chart)


In [None]:
############################## Final Chart ##############################

final_chart = (network_chart_with_filters | stacked_pie_charts)

final_chart.configure_view(
    strokeWidth=0 # remove border
).configure_axis(
    domainOpacity=0 # remove axis
).properties(
    title=alt.TitleParams(
        'Gender distribution and relations in "{}"'.format(play.title),
        anchor='middle',
        fontSize=20
    )
)