Step 1: Define file paths

In [73]:
directory = "../working-with-data-frames/"

Step 2: Install additional packages

In [73]:
# install packages that are not part of Python's standard distribution

!pip install pandas
!pip install openpyxl
!pip install holoviews
!pip install bokeh

Step 3: Read input file and create dataframe

In [77]:
## Import packages for working with EXCEL and dataframes

import pandas as pd
import openpyxl

# Read EXCEL file to dataframe
infile1="https://github.com/ieg-dhr/DigiKAR/raw/main/Sample%20Data/FactoidList-Erfassung-Jahns_OpenRefine.xlsx"
input_df = pd.read_excel(infile1)
#display(input_df) # display of df optional

  warn("Workbook contains no default style, apply openpyxl's default")


Step 4: Plot chord diagram with Holoviews

In [110]:
# Create a chord diagram with holoviews

import holoviews as hv
from holoviews import opts, dim
import bokeh

# Load the bokeh extension for holoviews
hv.extension('bokeh')

# Initialise diagram and define size
hv.output(size=150)

# Prepare "connections" file from original dataframe to meet requirements for chord diagrams
# Group input df by 'event_type' and 'place_name' and count occurrences
result_df = input_df.groupby(['event_type', 'place_name']).size().reset_index(name='weights') 

# Sort "weights" in descending order and select top 10 combinations
top_10_df = result_df.sort_values(by='weights', ascending=False).head(10)

display(top_10_df)

# Create a Holoviews Dataset for nodes
nodes = hv.Dataset(top_10_df, 'index')
nodes.data.head()

# Set parametres for the chord diagram and plot
chord = hv.Chord(top_10_df, vdims = "weights")
chord.opts(cmap = 'Category20',  # categorical color scheme
          edge_cmap = 'Category20',  # categorical color scheme
          labels = 'index',  # labels based on the index
          node_color = hv.dim('index').str(), # node colour based on index
          edge_color = hv.dim('event_type').str(), # edge colour based on the source, here: event_type)
          width = 300,
          height = 300)


Unnamed: 0,event_type,place_name,weights
130,Funktionsausübung,Wetzlar,67
291,Präsentation,Wetzlar,64
2,Aufschwörung,Wetzlar,53
418,erfolglose Bewerbung,Wetzlar,53
256,Praktikum,Wetzlar,49
97,Funktionsausübung,Koblenz,45
404,Vokation,Wetzlar,43
103,Funktionsausübung,Mainz,40
107,Funktionsausübung,München,31
389,Tod,Wetzlar,31


For sample diagrams, check the following Github directory: https://github.com/ieg-dhr/DigiKAR/tree/main/NonMap_Visualisations

Code based on the Holoviews documentation and adjusted for the DigiKAR project by Monika Barget, January 2024