In [1]:
import csv
import pandas as pd
import plotly.graph_objects as go
from IPython.display import Image

#### All EN

In [2]:
stats_by_lemma_en = pd.read_csv('/Users/anesterov/reps/LODlit/rm/rm_stats_by_lemma_en.csv')

In [15]:
# convert N to int
stats_by_lemma_en = stats_by_lemma_en.astype({"wd_pref":"int", "wd_aliases":"int","wd_descr":"int",\
                                             "aat_pref":"int","aat_alt":"int","aat_scopeNote":"int",\
                                             "pwn_le":"int","pwn_def":"int","pwn_examples":"int"})

#### All NL

In [20]:
stats_by_lemma_nl = pd.read_csv('/Users/anesterov/reps/LODlit/rm/rm_stats_by_lemma_nl.csv')

In [22]:
# convert N to int
stats_by_lemma_nl = stats_by_lemma_nl.astype({"wd_pref":"int", "wd_aliases":"int","wd_descr":"int",\
                                             "aat_pref":"int","aat_alt":"int","aat_scopeNote":"int",\
                                             "odwn_le":"int","odwn_sense_examples":"int","odwn_sense_definition":"int"})

#### Wikidata EN

In [17]:
stats_by_lemma_en.sort_values("wd_total", ascending=True, inplace=True)

In [18]:
lemmas = list(stats_by_lemma_en['lemma'])
pref = list(stats_by_lemma_en['wd_pref'])
aliases = list(stats_by_lemma_en['wd_aliases'])
descr = list(stats_by_lemma_en['wd_descr'])

# total marker
text_marker = list(stats_by_lemma_en['wd_total'])

bar_chart = go.Figure(data=[
    go.Bar(name="prefLabel",
           x=pref,
           y=lemmas,
           hovertext=pref,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="aliases",
           x=aliases,
           y=lemmas,
           hovertext=aliases,
           orientation='h',
           marker_color='rgb(128, 128, 128)'),
    
    go.Bar(name="description",
           x=descr,
           y=lemmas,
           hovertext=descr,
           orientation='h',
           marker_color='rgb(224, 224, 224)',
           text=text_marker,
           textposition='outside')

])

bar_chart.update_layout(barmode='stack',
                  width=540,
                  height=1230,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  title={"text":"Figure X. Wikidata. EN, Lemmas by properties, related matches, absolute",
                        "x":0,
                        "y":1,
                        "yanchor":"top",
                        "yref":"paper",
                        "pad":{"t":-55},
                        "font":{"size":14}},
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=60,
                    pad=1),
                  
                  font=dict(
                    family="Arial",
                    size=7,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
        )

bar_chart.update_traces(marker=dict(line=dict(width=0)),hoverinfo='text',
                 textfont_size=8, selector=dict(type='bar'))

bar_chart.update_yaxes(showgrid=False,
                visible=True)

bar_chart.update_xaxes(showgrid=False,
                visible=False)

In [19]:
bar_chart.write_html('wd_rm_en.html',include_plotlyjs="cdn")

#### Wikidata NL

In [23]:
stats_by_lemma_nl.sort_values("wd_total", ascending=True, inplace=True)

In [25]:
lemmas = list(stats_by_lemma_nl['lemma'])
pref = list(stats_by_lemma_nl['wd_pref'])
aliases = list(stats_by_lemma_nl['wd_aliases'])
descr = list(stats_by_lemma_nl['wd_descr'])

# total marker
text_marker = list(stats_by_lemma_nl['wd_total'])

bar_chart = go.Figure(data=[
    go.Bar(name="prefLabel",
           x=pref,
           y=lemmas,
           hovertext=pref,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="aliases",
           x=aliases,
           y=lemmas,
           hovertext=aliases,
           orientation='h',
           marker_color='rgb(128, 128, 128)'),
    
    go.Bar(name="description",
           x=descr,
           y=lemmas,
           hovertext=descr,
           orientation='h',
           marker_color='rgb(224, 224, 224)',
           text=text_marker,
           textposition='outside')

])

bar_chart.update_layout(barmode='stack',
                  width=540,
                  height=1230,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  title={"text":"Figure X. Wikidata. NL, Lemmas by properties, related matches, absolute",
                        "x":0,
                        "y":1,
                        "yanchor":"top",
                        "yref":"paper",
                        "pad":{"t":-55},
                        "font":{"size":14}},
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=60,
                    pad=1),
                  
                  font=dict(
                    family="Arial",
                    size=7,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
        )

bar_chart.update_traces(marker=dict(line=dict(width=0)),hoverinfo='text',
                 textfont_size=8, selector=dict(type='bar'))

bar_chart.update_yaxes(showgrid=False,
                visible=True)

bar_chart.update_xaxes(showgrid=False,
                visible=False)

In [26]:
bar_chart.write_html('wd_rm_nl.html',include_plotlyjs="cdn")

### AAT EN

In [27]:
stats_by_lemma_en.sort_values("aat_total", ascending=True, inplace=True)

In [29]:
lemmas = list(stats_by_lemma_en['lemma'])
pref = list(stats_by_lemma_en['aat_pref'])
alt = list(stats_by_lemma_en['aat_alt'])
scopeNote = list(stats_by_lemma_en['aat_scopeNote'])

# total marker
text_marker = list(stats_by_lemma_en['aat_total'])

bar_chart = go.Figure(data=[
    go.Bar(name="prefLabel",
           x=pref,
           y=lemmas,
           hovertext=pref,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="altLabel",
           x=alt,
           y=lemmas,
           hovertext=alt,
           orientation='h',
           marker_color='rgb(128, 128, 128)'),
    
    go.Bar(name="scopeNote",
           x=scopeNote,
           y=lemmas,
           hovertext=scopeNote,
           orientation='h',
           marker_color='rgb(224, 224, 224)',
           text=text_marker,
           textposition='outside')

])

bar_chart.update_layout(barmode='stack',
                  width=540,
                  height=1230,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  title={"text":"Figure A. AAT. EN, Lemmas by properties, related matches, absolute",
                        "x":0,
                        "y":1,
                        "yanchor":"top",
                        "yref":"paper",
                        "pad":{"t":-55},
                        "font":{"size":14}},
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=60,
                    pad=1),
                  
                  font=dict(
                    family="Arial",
                    size=7,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
                 )

bar_chart.update_traces(marker=dict(line=dict(width=0)),hoverinfo='text',
                 textfont_size=8, selector=dict(type='bar'))

bar_chart.update_yaxes(showgrid=False,
                visible=True)

bar_chart.update_xaxes(showgrid=False,
                visible=False)

In [30]:
bar_chart.write_html('aat_rm_en.html',include_plotlyjs="cdn")

### AAT NL

In [31]:
stats_by_lemma_nl.sort_values("aat_total", ascending=True, inplace=True)

In [33]:
lemmas = list(stats_by_lemma_nl['lemma'])
pref = list(stats_by_lemma_nl['aat_pref'])
alt = list(stats_by_lemma_nl['aat_alt'])
scopeNote = list(stats_by_lemma_nl['aat_scopeNote'])

# total marker
text_marker = list(stats_by_lemma_nl['aat_total'])

bar_chart = go.Figure(data=[
    go.Bar(name="prefLabel",
           x=pref,
           y=lemmas,
           hovertext=pref,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="altLabel",
           x=alt,
           y=lemmas,
           hovertext=alt,
           orientation='h',
           marker_color='rgb(128, 128, 128)'),
    
    go.Bar(name="scopeNote",
           x=scopeNote,
           y=lemmas,
           hovertext=scopeNote,
           orientation='h',
           marker_color='rgb(224, 224, 224)',
           text=text_marker,
           textposition='outside')

])

bar_chart.update_layout(barmode='stack',
                  width=540,
                  height=1230,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  title={"text":"Figure B. AAT. NL, Lemmas by properties, related matches, absolute",
                        "x":0,
                        "y":1,
                        "yanchor":"top",
                        "yref":"paper",
                        "pad":{"t":-55},
                        "font":{"size":14}},
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=60,
                    pad=1),
                  
                  font=dict(
                    family="Arial",
                    size=7,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
                 )

bar_chart.update_traces(marker=dict(line=dict(width=0)),hoverinfo='text',
                 textfont_size=8, selector=dict(type='bar'))

bar_chart.update_yaxes(showgrid=False,
                visible=True)

bar_chart.update_xaxes(showgrid=False,
                visible=False)

In [34]:
bar_chart.write_html('aat_rm_nl.html',include_plotlyjs="cdn")

### PWN 

In [35]:
stats_by_lemma_en.sort_values("pwn_total", ascending=True, inplace=True)

In [39]:
lemmas = list(stats_by_lemma_en['lemma'])
synset_lemmas = list(stats_by_lemma_en['pwn_le'])
definitions = list(stats_by_lemma_en['pwn_def'])
examples = list(stats_by_lemma_en['pwn_examples'])

# total marker
text_marker = list(stats_by_lemma_en['pwn_total'])

bar_chart = go.Figure(data=[
    go.Bar(name="synset lemmas",
           x=synset_lemmas,
           y=lemmas,
           hovertext=synset_lemmas,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="definitions",
           x=definitions,
           y=lemmas,
           hovertext=definitions,
           orientation='h',
           marker_color='rgb(128, 128, 128)'),
    
    go.Bar(name="examples",
           x=examples,
           y=lemmas,
           hovertext=examples,
           orientation='h',
           marker_color='rgb(224, 224, 224)',
           text=text_marker,
           textposition='outside')
])

bar_chart.update_layout(barmode='stack',
                  width=540,
                  height=1230,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  title={"text":"Figure C. PWN. Lemmas by properties, related matches, absolute",
                        "x":0,
                        "y":1,
                        "yanchor":"top",
                        "yref":"paper",
                        "pad":{"t":-55},
                        "font":{"size":14}},
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=60,
                    pad=1),
                  
                  font=dict(
                    family="Arial",
                    size=7,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
                 )

bar_chart.update_traces(marker=dict(line=dict(width=0)),hoverinfo='text',
                 textfont_size=8, selector=dict(type='bar'))

bar_chart.update_yaxes(showgrid=False,
                visible=True)

bar_chart.update_xaxes(showgrid=False,
                visible=False)

In [40]:
bar_chart.write_html('pwn_rm.html',include_plotlyjs="cdn")

### ODWN

In [41]:
stats_by_lemma_nl.sort_values("odwn_total", ascending=True, inplace=True)

In [44]:
lemmas = list(stats_by_lemma_nl['lemma'])
le_form = list(stats_by_lemma_nl['odwn_le'])
sense_def = list(stats_by_lemma_nl['odwn_sense_definition'])
sense_examples = list(stats_by_lemma_nl['odwn_sense_examples'])

# total marker
text_marker = list(stats_by_lemma_nl['odwn_total'])

bar_chart = go.Figure(data=[
    go.Bar(name="LE form",
           x=le_form,
           y=lemmas,
           hovertext=le_form,
           orientation='h',
           marker_color='rgb(0,0,0)'),
    
    go.Bar(name="sense def",
           x=sense_def,
           y=lemmas,
           hovertext=sense_def,
           orientation='h',
           marker_color='rgb(128, 128, 128)'),
    
    go.Bar(name="sense examples",
           x=sense_examples,
           y=lemmas,
           hovertext=sense_examples,
           orientation='h',
           marker_color='rgb(224, 224, 224)',
           text=text_marker,
           textposition='outside')
])

bar_chart.update_layout(barmode='stack',
                  width=540,
                  height=1230,
                  bargap=0.2,
                  paper_bgcolor='rgb(255, 255, 255)',
                  plot_bgcolor='rgb(255, 255, 255)',
                  title={"text":"Figure D. ODWN. Lemmas by properties, related matches, absolute",
                        "x":0,
                        "y":1,
                        "yanchor":"top",
                        "yref":"paper",
                        "pad":{"t":-55},
                        "font":{"size":14}},
                  
                  margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=60,
                    pad=1),
                  
                  font=dict(
                    family="Arial",
                    size=7,
                    color="black"),
                  
                  legend=dict(
                    traceorder="normal",
                    orientation="h",
                    itemsizing='constant',
                    itemwidth=30,
                    yanchor="top",
                    y=1.03,
                    xanchor="left",
                    x=-0.03,
                    font=dict(
                    family="Arial",
                    size=8,
                    color="black"
                )),
                 )

bar_chart.update_traces(marker=dict(line=dict(width=0)),hoverinfo='text',
                 textfont_size=8, selector=dict(type='bar'))

bar_chart.update_yaxes(showgrid=False,
                visible=True)

bar_chart.update_xaxes(showgrid=False,
                visible=False)

In [45]:
bar_chart.write_html('odwn_rm.html',include_plotlyjs="cdn")