In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython import display

plt.style.use('fivethirtyeight')
display.set_matplotlib_formats('svg')

In [None]:
file_path = '/content/drive/My Drive/TCC_data/metrics/data/evaluate_metrics_{}{}.tsv'

dfs = dict()
for name in ['cod_def_cons', 'constituicao', voxforge', 'mozilla']:
    dfs[name] = pd.read_csv(file_path.format(name, ''), sep='\t')
    dfs[name + '_oov'] = pd.read_csv(file_path.format(name, '_oov'), sep='\t')

In [None]:
def plot_comparison(eval, metric, dfs):
    col_name = 'eval_{}'.format(eval)

    labels = [*dfs.keys()]
    values = [df[col_name].values.mean() for df in dfs.values()]
    
    colors = plt.cm.get_cmap('Set1').colors
    colors = [c for c in colors for _ in (0, 1)]

    x = np.arange(len(labels) // 2)
    x = np.array([i + j for i in x for j in (0, 0.35)])

    width = 0.6
   
    f, ax = plt.subplots(figsize=(8,5))

    rects1 = ax.bar(x * 2, values, width, align='edge', color=colors)    

    ax.set_ylabel(metric.lower())
    ax.set_title('{} Score by Corpus'.format(eval.title()), pad=20)
    ax.set_xticks(x * 2)
    ax.set_xticklabels(labels, rotation=45)
    
    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = rect.get_height()
            ax.annotate(
                '{:.03f}'.format(height),
                xy=(rect.get_x() + rect.get_width() / 2, height),
                xytext=(0, 3),  # 3 points vertical offset
                textcoords="offset points",
                ha='center', va='bottom',fontsize=10
            )


    autolabel(rects1)

    plt.tight_layout()

    return ax

In [None]:
metrics = [
    ('word2vec_cbow_s50', 'cossine similarity'), ('word2vec_skip_s50', 'cossine similarity'),
    ('wang2vec_cbow_s50', 'cossine similarity'), ('wang2vec_skip_s50', 'cossine similarity'),
    ('bleu', 'bleu'), ('meteor', 'meteor'), 
    ('word error rate', 'wer %'), ('jaccard_distance', 'jaccard distance')
]

for e, m in metrics:
    plot_comparison(e, m, dfs);
    plt.savefig('/content/drive/My Drive/TCC_data/metrics/plots/{}.png'.format(e))