In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

https://wortschatz.uni-leipzig.de/de/download/German#deu_wikipedia_2018

The goal of this notebook is to visualize the results that were gathered in a visual form using plots. For now they are only shown in tables which is confusing in some cases

In [None]:
PDF_FOLDER_CHOSEN = 'plots/'

In [None]:
HEX_COLORS = ['#000000','#E69F00','#56B4E9','#009E73','#F0E442','#0072B2','#D55E00','#CC79A7']
RGB_COLORS = [mcolors.hex2color(hex_color) for hex_color in HEX_COLORS]

BAR_CHART_COLOR = RGB_COLORS[2]
BAR_CHART_ORANGE = RGB_COLORS[1]

## Plot 1: Comparison of the pos-tag based approaches

In [None]:
categories = ['Non-Opinionated', 'Opinionated', 'Comparative Opinionated', 'Superlative Opinionated']
values1 = [6250, 44578, 8697, 5259]
values2 = [9422, 44070, 7343, 3543]

fig, ax = plt.subplots()

bar_width = 0.35

bar_positions1 = np.arange(len(categories))
bar_positions2 = bar_positions1 + bar_width

ax.bar(bar_positions1, values1, width=bar_width, label='POS-Tags', color=BAR_CHART_COLOR)
ax.bar(bar_positions2, values2, width=bar_width, label='POS-Tags + Filter', color=BAR_CHART_ORANGE)

ax.set_xlabel('Categories')
ax.set_ylabel('Amount')
ax.set_title('Comparision of the two POS-Tag based approaches')

ax.set_xticks(bar_positions1 + bar_width / 2)
ax.set_xticklabels(categories)
plt.xticks(rotation='vertical')

ax.legend()

plt.tight_layout()
plt.savefig(PDF_FOLDER_CHOSEN + 'compare_pos.pdf', format='pdf')

## Plot 2: Data driven Approach (Opinionated)

In [None]:
values = [41728, 43867, 44070, 44578, 56012, 58587, 58862, 59298]
categories = ['POS-Tags \u2227 Set P', 'POS-Tags \u2227 Set PD' , 'POS-Tags + Filter','POS-Tags', 'Set P', 'Set PD','POS-Tags \u2228 Set P', 'POS-Tags \u2228 Set PD']

barchart = plt.bar(categories, values, color=BAR_CHART_COLOR)
barchart[3].set_color(BAR_CHART_ORANGE)

# Add labels and title
plt.xlabel('Approaches')
plt.ylabel('Amount')
plt.title('Data-driven approaches vs POS-Tag approach (Positive)')

# Rotate x-labels
plt.xticks(rotation='vertical')

plt.tight_layout()
plt.savefig(PDF_FOLDER_CHOSEN + 'datadriven_positive.pdf', format='pdf')

In [None]:
print("Logical AND:", "\u2227")  # Prints the logical AND symbol (∧)
print("Logical OR:", "\u2228")

## Plot 3: Data driven Approach (Comparative Opinionated)

In [None]:
values = [6284, 7343, 7405, 8697, 19076, 20275, 21489, 21567]
categories = ['POS-Tags \u2227 Set C','POS-Tags + Filter', 'POS-Tags \u2227 Set CD', 'POS-Tags', 'Set C', 'Set CD', 'POS-Tags \u2228 Set C', 'POS-Tags \u2228 Set CD']
barchart = plt.bar(categories, values, color=BAR_CHART_COLOR)
barchart[3].set_color(BAR_CHART_ORANGE)

# Add labels and title
plt.xlabel('Approaches')
plt.ylabel('Amount')
plt.title('Data-driven approaches vs POS-Tag approach (Comparative)')

# Rotate x-labels
plt.xticks(rotation='vertical')

plt.tight_layout()
plt.savefig(PDF_FOLDER_CHOSEN + 'datadriven_comparative.pdf', format='pdf')

## Plot 4: Data driven Approach (Superlative Opinionated)

In [None]:
values = [1690, 1973, 3543, 5259, 5542]
categories = ['POS-Tags \u2227 Set S', 'Set S', 'POS-Tags + Filter', 'POS-Tags', 'POS-Tags \u2228 Set S']
barchart = plt.bar(categories, values, color=BAR_CHART_COLOR)
barchart[3].set_color(BAR_CHART_ORANGE)

# Add labels and title
plt.xlabel('Approaches')
plt.ylabel('Amount')
plt.title('Data-driven approaches vs POS-Tag approach (Superlative)')

# Rotate x-labels
plt.xticks(rotation='vertical')

plt.tight_layout()
plt.savefig(PDF_FOLDER_CHOSEN + 'datadriven_superlative.pdf', format='pdf')

## Plot 5: Rule Based Approach (Comparative Opinionated)

In [None]:
values = [745, 748, 1025, 1533, 1722, 3051, 7343, 8697, 9485, 9485, 10723]
categories = ['POS-Tags \u2227 (Ruleset C \u2227 Set C)', 
              'POS-Tags \u2227 (Ruleset C \u2227 Set CD)', 
              'POS-Tags \u2227 Ruleset C', 
              'Ruleset C \u2227 Set C', 
              'Ruleset C \u2227 Set CD', 
              'Ruleset C',
              'POS-Tags + Filter',
              'POS-Tags',
              'POS-Tags \u2228 (Ruleset C \u2227 Set C)',
              'POS-Tags \u2228 (Ruleset C \u2227 Set CD)', 
              'POS-Tags \u2228 Ruleset C'
             ]

barchart = plt.bar(categories, values, color=BAR_CHART_COLOR)
barchart[7].set_color(BAR_CHART_ORANGE)
# Add labels and title
plt.xlabel('Approaches')
plt.ylabel('Amount')
plt.title('Rule-based approaches vs POS-Tag approach (Comparative)')

# Rotate x-labels
plt.xticks(rotation='vertical')

plt.tight_layout()
plt.savefig(PDF_FOLDER_CHOSEN + 'rulebased_comparative.pdf', format='pdf')

## Plot 6: Rule Based Approach (Superlative Opinionated)

In [None]:
values = [383, 477, 3543, 5259, 5353]
categories = ['POS-Tags \u2227 Ruleset S', 'Ruleset S', 'POS-Tags + Filter', 'POS-Tags', 'POS-Tags \u2228 Ruleset S']

barchart = plt.bar(categories, values, color=BAR_CHART_COLOR)
barchart[3].set_color(BAR_CHART_ORANGE)
# Add labels and title
plt.xlabel('Approaches')
plt.ylabel('Amount')
plt.title('Rule-based approaches vs POS-Tag approach (Superlative)')

# Rotate x-labels
plt.xticks(rotation='vertical')
plt.tight_layout()
plt.savefig(PDF_FOLDER_CHOSEN + 'rulebased_superlative.pdf', format='pdf')