# Evaluation
## Setup

In [None]:
import analysis.aggregator as ag
import data.file_handler as fh
from analysis.pipeline_blocks import calc_ari_for_tagged_senses, calc_ari_per_token, add_sense_counts_to_id_map
from data.corpus_handler import CorpusName, CorpusHandler
import plotly.express as px

In [None]:
LABEL_DICT = {'unique_sense_count': 'Unique Sense Count',
              'total_token_count': 'Total Token Count',
              'unique_token_count': 'Unique Token Count',
              'ari': 'ARI'}
WIDTH = 512
HEIGHT = 256

## Corpus Evaluation

In [None]:
corpus = CorpusHandler(CorpusName.SEMCOR, './data/corpus_cache')
st = ag.calc_corpus_statistics_for_tagged_senses(corpus)
st

In [None]:
f"& ${st['unique_sense_count']}$ & ${st['unique_token_count']}$ & ${st['total_token_count']}$ & ${st['unique_monosemous_token_count']}$ & ${st['total_monosemous_token_count']}$ & ${st['unique_polysemous_token_count']}$ & ${st['total_polysemous_token_count']}$"

In [None]:
tagged_tokens = corpus.get_tagged_tokens()

### Baseline: One cluster/sense per token

In [None]:
tagged_tokens['sense'] = range(tagged_tokens.token.count())
tagged_tokens.sense.nunique(), calc_ari_for_tagged_senses(corpus, tagged_tokens)

### Baseline: One cluster/sense per unique token

In [None]:
tagged_tokens['sense'] = tagged_tokens.token.factorize()[0]
tagged_tokens.sense.nunique(), calc_ari_for_tagged_senses(corpus, tagged_tokens)

### Relationship Between Sense Counts and Token Counts

In [None]:
tagged_tokens = corpus.get_tagged_tokens()
tagged_tokens_and_counts = ag.count_unique_senses_per_token(tagged_tokens[tagged_tokens.tagged_sense])
token_counts_per_sense_count = ag.count_tokens_per_sense_count(tagged_tokens_and_counts)
token_counts_per_sense_count.head()

In [None]:
fig = px.scatter(token_counts_per_sense_count, x='unique_sense_count', y='unique_token_count',
                 color='total_token_count', size='total_token_count', color_continuous_scale=px.colors.sequential.Greys,
                 log_x=True, log_y=True, labels=LABEL_DICT,
                 template='plotly_white', width=WIDTH, height=WIDTH)
fig.update_layout(coloraxis_colorbar=dict(title_side='right',
                                          thicknessmode="fraction", thickness=0.04,
                                          ticks="outside", dtick=10000))
fig.update_traces(marker={'line': {'color': 'black'}})

fig.show()
#fig.write_image('data/plots/fig_semcor_tokens_and_senses.pdf')

In [None]:
tagged_tokens_and_counts[tagged_tokens_and_counts.unique_sense_count > 50].sort_values(by='unique_sense_count')

## Dictionary Evaluation

In [None]:
ABS_PATH = fh.add_and_get_abs_path('./data/results')
#dictionary = fh.load_df(ABS_PATH, 'toy-linkage_single-dist_0.4-dictionary.pkl')
#dictionary = fh.load_df(ABS_PATH, 'semcor-dist_0.4-dictionary.pkl')

In [None]:
ag.collect_references_and_word_vectors(
    ag.unpack_per_word_vector(
        dictionary,
        ['reference_id', 'word_vector_id', 'sense']),
    ['token', 'sense'])

In [None]:
dictionary = calc_ari_per_token(corpus, dictionary)
fig_ari_per_token = px.histogram(dictionary[dictionary.tagged_token], x='ari', range_x=[-1, 1],
                                 labels=LABEL_DICT,
                                 template='plotly_white', width=WIDTH, height=HEIGHT)
fig_ari_per_token.show()

In [None]:
dictionary = add_sense_counts_to_id_map(corpus, dictionary)
ari_per_sense_count = dictionary[dictionary.tagged_token].groupby(by='unique_sense_count').aggregate({'ari': 'mean'}).reset_index()
ari_per_sense_count

In [None]:
fig_ari_per_sense_count = px.bar(ari_per_sense_count, x='unique_sense_count', y='ari', range_y=[-1, 1],
                                 labels=LABEL_DICT, template='plotly_white', width=WIDTH, height=HEIGHT)
fig_ari_per_sense_count.show()