In [None]:
import pandas as pd
import plotly.graph_objects as go
from IPython.display import Image

#### Figure 1. Number of batches grouped by Krippendorff’s 𝛼 ranges

In [None]:
fig_1_data = pd.read_csv("figure_1.csv")

In [None]:
categories = list(fig_1_data['K alpha range'])
crowd = list(fig_1_data['crowdworkers'])
exp = list(fig_1_data['experts'])
batches = list(fig_1_data['total'])

fig_1 = go.Figure(data=[
    go.Bar(name="Crowdsource workers",
           x=categories,
           y=crowd,
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name='Expert annotators',
           x=categories,
           y=exp,
           text=batches,
           textposition='outside',
           marker_color='gray')

])

fig_1.update_layout(barmode='stack',
                    width=600,
                    bargap=0.1,
                    paper_bgcolor='rgb(255, 255, 255)',
                    plot_bgcolor='rgb(255, 255, 255)',
                    
                    margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=0),
                    
                  font=dict(
                    family="Arial",
                    size=14,
                    color="black"),
                    
                  legend=dict(
                    traceorder="normal",
                    orientation="v",
                    yanchor="top",
                    y=1,
                    xanchor="left",
                    x=0.7)
                 )
fig_1.update_traces(marker=dict(line=dict(width=0.5),line_color="black"),hoverinfo='skip',
                 outsidetextfont_size=14, selector=dict(type='bar'))

fig_1.update_yaxes(showgrid=False,
                visible=False)

fig_1.show()

#### Figure 2: Number of samples grouped by percentage agreement, incl. majority votes of expert annotators

In [None]:
fig_2_data = pd.read_csv("figure_2.csv")

In [None]:
categories = list(fig_2_data['category'])
omstr = list(fig_2_data['omstr'])
niet = list(fig_2_data['niet'])
idk = list(fig_2_data['idk'])
ocr = list(fig_2_data['ocr'])
no_majority = list(fig_2_data['no_majority'])
num_of_samples = list(fig_2_data['num_of_samples'])

fig_2 = go.Figure(data=[
    go.Bar(name="Contentious",
           x=categories,
           y=omstr,
           textposition='auto',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="Non-contentious",
           x=categories,
           y=niet,
           marker_color='rgb(255, 255, 255)',
           marker_pattern_shape=""),
    
    go.Bar(name="I don't know",
           x=categories,
           y=idk,
           marker_color='rgb(255, 255, 255)',
           marker_pattern_shape=".",
           marker_pattern_size=6,
           marker_pattern_solidity=0.2,
           marker_pattern_fgcolor='black',
           marker_pattern_fgopacity=1),
    
    go.Bar(name='Bad OCR',
           x=categories,
           y=ocr,
           marker_color='rgb(255, 255, 255)', 
           marker_pattern_shape="/", 
           marker_pattern_size=6,
           marker_pattern_solidity=0.3, 
           marker_pattern_fgcolor='black',
           marker_pattern_fgopacity=1),
    
    go.Bar(name='No majority',
           x=categories,
           y=no_majority,
           text=num_of_samples, # here it shows the total num of samples in every category
           textposition='outside',
           marker_color='gray')

])

fig_2.update_layout(barmode='stack',
                  width=600,
                  bargap=0.1,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=0),
                  
                  font=dict(
                    family="Arial",
                    size=14,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="v",
                    yanchor="top",
                    y=1,
                    xanchor="left",
                    x=0.75)
                 )

fig_2.update_traces(marker=dict(line=dict(width=0.5),
                              line_color="black"),
                  hoverinfo='skip',
                  outsidetextfont_size=14,
                  selector=dict(type='bar'))

fig_2.update_yaxes(showgrid=False, visible=False)

fig_2.show()

#### Figure 3: Number of samples grouped by percentage agreement,incl. majority votes of crowd annotators (samples annotated by only one participant (batch 50) and control samples are excluded)

In [None]:
fig_3_data = pd.read_csv("figure_3.csv")

In [None]:
categories = list(fig_3_data['category'])
omstr = list(fig_3_data['omstr'])
niet = list(fig_3_data['niet'])
idk = list(fig_3_data['idk'])
ocr = list(fig_3_data['ocr'])
no_majority = list(fig_3_data['no_majority'])
num_of_samples = list(fig_3_data['num_of_samples'])

fig_3 = go.Figure(data=[
    go.Bar(name="Contentious",
           x=categories,
           y=omstr,
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="Non-contentious",
           x=categories,
           y=niet,
           marker_color='rgb(255, 255, 255)',
           marker_pattern_shape=""),
    
    go.Bar(name="I don't know",
           x=categories,
           y=idk,
           marker_color='rgb(255, 255, 255)',
           marker_pattern_shape=".",
           marker_pattern_size=4,
           marker_pattern_solidity=0.2,
           marker_pattern_fgcolor='black',
           marker_pattern_fgopacity=1),
    
    go.Bar(name='Bad OCR',
           x=categories,
           y=ocr,
           marker_color='white',
           marker_pattern_shape="/",
           marker_pattern_size=4,
           marker_pattern_solidity=0.2,
           marker_pattern_fgcolor='black',
           marker_pattern_fgopacity=1),
    
    go.Bar(name='No majority',
           x=categories,
           y=no_majority,
           text=num_of_samples, # here it shows the total num of samples in every category
           textposition='outside',
           marker_color='gray')

])

fig_3.update_layout(barmode='stack',
                    width=620,
                    bargap=0.1,
                    paper_bgcolor='rgb(255, 255, 255)',
                   plot_bgcolor='rgb(255, 255, 255)',
                    
                    margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=0),
                    
                  font=dict(
                    family="Arial",
                    size=14,
                    color="black"),
                    
                  legend=dict(
                    traceorder="normal",
                    orientation="v",
                    yanchor="top",
                    y=1,
                    xanchor="left",
                    x=0.75)
                 )

fig_3.update_traces(marker=dict(line=dict(width=0.5),
                              line_color="black"),
                  hoverinfo='skip',
                  outsidetextfont_size=14,
                  selector=dict(type='bar'))

fig_3.update_yaxes(showgrid=False,
                visible=False)

fig_3.show()

#### Figure 4: The proportion of majority votes of 91 target words. The number of samples per target word is on the right (samples annotated by only one participant (batch 50) are excluded)

In [None]:
# the data in the csv was sorted by 'niet' (non-contentious) asc first and then by 'omstr' (contentious) desc

fig_4_data = pd.read_csv("figure_4.csv")

In [None]:
categories = list(fig_4_data['target'])
omstr = list(fig_4_data['omstr'])
niet = list(fig_4_data['niet'])
idk = list(fig_4_data['idk'])
ocr = list(fig_4_data['ocr'])
no_majority = list(fig_4_data['no_majority'])
text_marker = list(fig_4_data['n_samples'])

fig_4 = go.Figure(data=[
    go.Bar(name="Contentious",
           x=omstr,
           y=categories,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="Non-contentious",
           x=niet,
           y=categories,
           orientation='h',
           marker_color='rgb(255, 255, 255)'),
    
    go.Bar(name="I don't know",
           x=idk,
           y=categories,
           orientation='h',
           marker_color='rgb(255, 255, 255)',
           marker_pattern_shape="/",
           marker_pattern_size=4,
           marker_pattern_solidity=0.4,
           marker_pattern_fgcolor='black',
           marker_pattern_fgopacity=1),
    
    go.Bar(name='Bad OCR',
           x=ocr,
           y=categories,
           orientation='h',
           marker_color='white',
           marker_pattern_shape="x",
           marker_pattern_size=6,
           marker_pattern_solidity=0.4,
           marker_pattern_fgcolor='black',
           marker_pattern_fgopacity=1),
    
    go.Bar(name='No majority',
           x=no_majority,
           y=categories,
           orientation='h',
           text=text_marker,
           textposition='outside',
           marker_color='gray')

])

fig_4.update_layout(barmode='stack',
                  width=520,
                  height=1180,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  xaxis_title="",
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=0),
                  
                  font=dict(
                    family="Arial",
                    size=8,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
                 )

fig_4.update_traces(marker=dict(line=dict(width=0.5),line_color="black"),hoverinfo='skip',
                 textfont_size=8, selector=dict(type='bar'))

fig_4.update_yaxes(showgrid=False,
                visible=True)

fig_4.update_xaxes(showgrid=False,
                visible=False)

fig_4.show()

##### Importing the bar charts as static images

In [None]:
fig_1_bytes = fig_1.to_image(format="png", width=600, height=360, scale=2)
fig_2_bytes = fig_2.to_image(format="png", width=600, height=520, scale=2)
fig_3_bytes = fig_3.to_image(format="png", width=600, height=520, scale=2)
fig_4_bytes = fig_4.to_image(format="png", width=520, height=1180, scale=2)

In [None]:
Image(fig_1_bytes)

In [None]:
Image(fig_2_bytes)

In [None]:
Image(fig_3_bytes)

In [None]:
Image(fig_4_bytes)