# Evaluation
## Setup

In [None]:
import aggregation.aggregator as ag
import data.file_handler as fh
import data.file_name_generator as fg
from aggregation.pipeline_blocks import add_sense_counts_to_id_map, calc_ari, calc_ari_per_token, calc_silhouette_score_per_token
from clustering.linkage_name import LinkageName
from clustering.metric_name import MetricName
from data.corpus_handler import CorpusName, CorpusHandler
import plotly.express as px

In [None]:
LABEL_DICT = {'unique_sense_count': 'Unique Sense Count',
              'total_token_count': 'Total Token Count',
              'unique_token_count': 'Unique Token Count',
              'ari': 'ARI',
              'silhouette_score': 'Silhouette Score'}
WIDTH = 512
HEIGHT = 256

## Corpus Evaluation

In [None]:
corpus = CorpusHandler(CorpusName.TOY, './data/corpus_cache')
st = ag.calc_corpus_statistics_for_tagged_senses(corpus)
st

In [None]:
tagged_tokens = corpus.get_tagged_tokens()

### Complete Disambiguation Baseline: One cluster/sense per token

In [None]:
tagged_tokens['sense'] = range(tagged_tokens.token.count())
calc_ari(corpus.get_tagged_tokens(), tagged_tokens)

### No Disambiguation Baseline: One cluster/sense per unique token

In [None]:
tagged_tokens['sense'] = tagged_tokens.token.factorize()[0]
calc_ari(corpus.get_tagged_tokens(), tagged_tokens)

### Relationship Between Sense Counts and Token Counts

In [None]:
tagged_tokens = corpus.get_tagged_tokens()
tagged_tokens_and_counts = ag.count_unique_senses_per_token(tagged_tokens[tagged_tokens.tagged_sense])
token_counts_per_sense_count = ag.count_tokens_per_sense_count(tagged_tokens_and_counts)
token_counts_per_sense_count.head()

In [None]:
fig = px.scatter(token_counts_per_sense_count, x='unique_sense_count', y='unique_token_count',
                 color='total_token_count', size='total_token_count', color_continuous_scale=px.colors.sequential.Greys,
                 log_x=True, log_y=True, labels=LABEL_DICT,
                 template='plotly_white', width=WIDTH, height=WIDTH)
fig.update_layout(coloraxis_colorbar=dict(title_side='right',
                                          thicknessmode="fraction", thickness=0.04,
                                          ticks="outside"))
fig.update_traces(marker={'line': {'color': 'black'}})

fig.show()
#fig.write_image(f'data/plots/fig_{corpus.corpus_name}_tokens_and_senses.pdf')

## Dictionary Evaluation

In [None]:
ABS_PATH = fh.add_and_get_abs_path('./data/results')
#EXPERIMENT_PREFIX = fg.gen_experiment_prefix(corpus.corpus_name, MetricName.COSINE, LinkageName.SINGLE, 0.4)
#EXPERIMENT_PREFIX = fg.gen_experiment_prefix_no_dist(corpus.corpus_name, MetricName.EUCLIDEAN, LinkageName.SINGLE)
dictionary = fh.load_df(ABS_PATH, fg.gen_dictionary_file_name(EXPERIMENT_PREFIX))

### Browse Dictionary

In [None]:
ag.pack_sentence_ids_and_token_ids(
    ag.unpack_and_sort_per_token_id(
        dictionary,
        ['sentence_id', 'token_id', 'sense']),
    ['token', 'sense'])

### ARI per Token and Unique Sense Count

In [None]:
dictionary = calc_ari_per_token(corpus.get_tagged_tokens(), dictionary)
fig_ari_per_token = px.histogram(dictionary[dictionary.tagged_token], x='ari', range_x=[-1, 1],
                                 labels=LABEL_DICT,
                                 template='plotly_white', width=WIDTH, height=HEIGHT)
fig_ari_per_token.show()

In [None]:
if 'unique_sense_count' not in dictionary.columns:
    dictionary = add_sense_counts_to_id_map(corpus.get_tagged_tokens(), dictionary)
ari_per_sense_count = dictionary[dictionary.tagged_token].groupby(by='unique_sense_count').aggregate({'ari': 'mean'}).reset_index()

fig_ari_per_sense_count = px.bar(ari_per_sense_count, x='unique_sense_count', y='ari',
                                 labels=LABEL_DICT, template='plotly_white', width=WIDTH, height=HEIGHT)
fig_ari_per_sense_count.update_xaxes(
    showgrid=False, ticks="outside", tickson="labels", ticklen=4,
    nticks=int(ari_per_sense_count.unique_sense_count.max()))

fig_ari_per_sense_count.show()

### Silhouette Coefficient per Token and Unique Sense Count

In [None]:
word_vectors = fh.load_matrix(ABS_PATH, fg.gen_word_vec_file_name(corpus.corpus_name))
dictionary = calc_silhouette_score_per_token(word_vectors, dictionary, MetricName.EUCLIDEAN)
fig_silhouette_score_per_token = px.histogram(dictionary[dictionary.tagged_token], x='silhouette_score', range_x=[-1, 1],
                                 labels=LABEL_DICT,
                                 template='plotly_white', width=WIDTH, height=HEIGHT)
fig_silhouette_score_per_token.show()

In [None]:
if 'unique_sense_count' not in dictionary.columns:
    dictionary = add_sense_counts_to_id_map(corpus.get_tagged_tokens(), dictionary)
silhouette_score_per_sense_count = dictionary[dictionary.tagged_token].groupby(by='unique_sense_count').aggregate({'silhouette_score': 'mean'}).dropna().reset_index()

fig_silhouette_score_per_sense_count = px.bar(silhouette_score_per_sense_count, x='unique_sense_count', y='silhouette_score',
                                 labels=LABEL_DICT, template='plotly_white', width=WIDTH, height=HEIGHT)
fig_silhouette_score_per_sense_count.update_xaxes(
    showgrid=False, ticks="outside", tickson="labels", ticklen=4,
    nticks=int(silhouette_score_per_sense_count.unique_sense_count.max()))

fig_silhouette_score_per_sense_count.show()