### Trial set evaluations for SS Phase 2, on best 2 models resulting from SG and SS step phase 1:
This notebook corresponds to the results presented in section 4.3 of the thesis. 


In [1]:
from utils import *

#### For model 'roberta-base':

In [2]:
data, substitutes_df = get_data_and_create_empty_df()

model = 'roberta-base'
model_name_str = get_str_for_file_name(model)

nlp, lm_tokenizer, lm_model, fill_mask = instantiate_spacy_tokenizer_model_pipeline(model)

In [3]:
# Substitute Selection phase 2, option 1: substitutes that are synonyms of the complex word first (lemmatized substitutes that share the same synset as the lemmatized complex word)
substitutes_df = substitute_selection_phase_2_option_1(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp)

SS_phase2_option1_SharedSyns_robertabase exported to csv in path './predictions/trial/SS_phase2_option1_SharedSyns_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option1_SharedSyns_robertabase.tsv --output_file ./output/trial/SS_phase2_option1_SharedSyns_robertabase.tsv

In [3]:
# Substitute Selection phase 2, option 2a: sort the substitutes that share their 1st level hypernyms with the complex word first: (retrieving the 1-level hypernyms for all possible synsets of the complex word and substitutes, and then compare these hypernyms to identify shared hypernyms between the complex word and substitutes. these will be prioritized in the resulting list).
substitutes_df = substitute_selection_phase_2_option_2(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, levels=[1])

SS_phase2_option2_SharedHyper1_robertabase exported to csv in path './predictions/trial/SS_phase2_option2_SharedHyper1_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option2_SharedHyper1_robertabase.tsv --output_file ./output/trial/SS_phase2_option2_SharedHyper1_robertabase.tsv

In [4]:
# Substitute Selection phase 2, option 2b: sort the substitutes that share their indirect hypernyms (2 levels up) with the complex word first
substitutes_df = substitute_selection_phase_2_option_2(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, levels=[2])

SS_phase2_option2_SharedHyper2_robertabase exported to csv in path './predictions/trial/SS_phase2_option2_SharedHyper2_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option2_SharedHyper2_robertabase.tsv --output_file ./output/trial/SS_phase2_option2_SharedHyper2_robertabase.tsv

In [5]:
# Substitute Selection phase 2, option 2c: sort the substitutes that share either their direct (1 level up) or their indirect (2 levels up) hypernyms with the complex word first
substitutes_df = substitute_selection_phase_2_option_2(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, levels=[1,2])

SS_phase2_option2_SharedHyper1_2_robertabase exported to csv in path './predictions/trial/SS_phase2_option2_SharedHyper1_2_robertabase.tsv'



In [None]:
python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option2_SharedHyper1_2_robertabase.tsv --output_file ./output/trial/SS_phase2_option2_SharedHyper1_2_robertabase.tsv

In [7]:
# Substitute Selection phase 2, option 3: substitutes ranked on their BertScores (Bertscore was applied by using all 6 models):
# option 3a
score_model = 'bert-base-uncased'
letter = 'a'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3a_BSbertbase_robertabase exported to csv in path './predictions/trial/SS_phase2_option3a_BSbertbase_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3a_BSbertbase_robertabase.tsv --output_file ./output/trial/SS_phase2_option3a_BSbertbase_robertabase.tsv

In [8]:
# Option 3b: Bs with bertlarge:
score_model = 'bert-large-uncased'
letter = 'b'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3b_BSbertlarge_robertabase exported to csv in path './predictions/trial/SS_phase2_option3b_BSbertlarge_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3b_BSbertlarge_robertabase.tsv --output_file ./output/trial/SS_phase2_option3b_BSbertlarge_robertabase.tsv

In [9]:
# Option 3c: Bertscore with electrabase:
score_model = 'google/electra-base-generator'
letter = 'c'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3c_BSelectrabase_robertabase exported to csv in path './predictions/trial/SS_phase2_option3c_BSelectrabase_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3c_BSelectrabase_robertabase.tsv --output_file ./output/trial/SS_phase2_option3c_BSelectrabase_robertabase.tsv

In [10]:
# Option 3d: Bertscore with electralarge:
score_model = 'google/electra-large-generator'
letter = 'd'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3d_BSelectralarge_robertabase exported to csv in path './predictions/trial/SS_phase2_option3d_BSelectralarge_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3d_BSelectralarge_robertabase.tsv --output_file ./output/trial/SS_phase2_option3d_BSelectralarge_robertabase.tsv

In [11]:
# Option 3e: Bertscore with robertabase:
score_model = 'roberta-base'
letter = 'e'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3e_BSrobertabase_robertabase exported to csv in path './predictions/trial/SS_phase2_option3e_BSrobertabase_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3e_BSrobertabase_robertabase.tsv --output_file ./output/trial/SS_phase2_option3e_BSrobertabase_robertabase.tsv

In [12]:
# Option 3f: Bertscore with robertalarge:
score_model = 'roberta-large'
letter = 'f'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3f_BSrobertalarge_robertabase exported to csv in path './predictions/trial/SS_phase2_option3f_BSrobertalarge_robertabase.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3f_BSrobertalarge_robertabase.tsv --output_file ./output/trial/SS_phase2_option3f_BSrobertalarge_robertabase.tsv

#### For model 'google/electra-large-generator':

In [6]:
data, substitutes_df = get_data_and_create_empty_df()

model = 'google/electra-large-generator'
model_name_str = get_str_for_file_name(model)

nlp, lm_tokenizer, lm_model, fill_mask = instantiate_spacy_tokenizer_model_pipeline(model)

In [14]:
# Substitute Selection phase 2, option 1: substitutes that are synonyms of the complex word first (lemmatized substitutes that share the same synset as the lemmatized complex word)
substitutes_df = substitute_selection_phase_2_option_1(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp)

SS_phase2_option1_SharedSyns_electralarge exported to csv in path './predictions/trial/SS_phase2_option1_SharedSyns_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option1_SharedSyns_electralarge.tsv --output_file ./output/trial/SS_phase2_option1_SharedSyns_electralarge.tsv

In [7]:
# Substitute Selection phase 2, option 2a: sort the substitutes that share their 1st level hypernyms with the complex word first: (retrieving the 1-level hypernyms for all possible synsets of the complex word and substitutes, and then compare these hypernyms to identify shared hypernyms between the complex word and substitutes. these will be prioritized in the resulting list).
substitutes_df = substitute_selection_phase_2_option_2(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, levels=[1])

SS_phase2_option2_SharedHyper1_electralarge exported to csv in path './predictions/trial/SS_phase2_option2_SharedHyper1_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option2_SharedHyper1_electralarge.tsv --output_file ./output/trial/SS_phase2_option2_SharedHyper1_electralarge.tsv

In [8]:
# Substitute Selection phase 2, option 2b: sort the substitutes that share their indirect hypernyms (2 levels up) with the complex word first
substitutes_df = substitute_selection_phase_2_option_2(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, levels=[2])

SS_phase2_option2_SharedHyper2_electralarge exported to csv in path './predictions/trial/SS_phase2_option2_SharedHyper2_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option2_SharedHyper2_electralarge.tsv --output_file ./output/trial/SS_phase2_option2_SharedHyper2_electralarge.tsv

In [9]:
# Substitute Selection phase 2, option 2c: sort the substitutes that share either their direct (1 level up) or their indirect (2 levels up) hypernyms with the complex word first
substitutes_df = substitute_selection_phase_2_option_2(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, levels=[1,2])

SS_phase2_option2_SharedHyper1_2_electralarge exported to csv in path './predictions/trial/SS_phase2_option2_SharedHyper1_2_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option2_SharedHyper1_2_electralarge.tsv --output_file ./output/trial/SS_phase2_option2_SharedHyper1_2_electralarge.tsv

In [18]:
# Substitute Selection phase 2, option 3: substitutes ranked on their BertScores (Bertscore was applied by using all 6 models):
# option 3a
score_model = 'bert-base-uncased'
letter = 'a'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3a_BSbertbase_electralarge exported to csv in path './predictions/trial/SS_phase2_option3a_BSbertbase_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3a_BSbertbase_electralarge.tsv --output_file ./output/trial/SS_phase2_option3a_BSbertbase_electralarge.tsv

In [19]:
# Option 3b: Bs with bertlarge:
score_model = 'bert-large-uncased'
letter = 'b'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3b_BSbertlarge_electralarge exported to csv in path './predictions/trial/SS_phase2_option3b_BSbertlarge_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3b_BSbertlarge_electralarge.tsv --output_file ./output/trial/SS_phase2_option3b_BSbertlarge_electralarge.tsv

In [20]:
# Option 3c: Bertscore with electrabase:
score_model = 'google/electra-base-generator'
letter = 'c'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3c_BSelectrabase_electralarge exported to csv in path './predictions/trial/SS_phase2_option3c_BSelectrabase_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3c_BSelectrabase_electralarge.tsv --output_file ./output/trial/SS_phase2_option3c_BSelectrabase_electralarge.tsv

In [21]:
# Option 3d: Bertscore with electralarge:
score_model = 'google/electra-large-generator'
letter = 'd'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3d_BSelectralarge_electralarge exported to csv in path './predictions/trial/SS_phase2_option3d_BSelectralarge_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3d_BSelectralarge_electralarge.tsv --output_file ./output/trial/SS_phase2_option3d_BSelectralarge_electralarge.tsv

In [22]:
# Option 3e: Bertscore with robertabase:
score_model = 'roberta-base'
letter = 'e'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3e_BSrobertabase_electralarge exported to csv in path './predictions/trial/SS_phase2_option3e_BSrobertabase_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3e_BSrobertabase_electralarge.tsv --output_file ./output/trial/SS_phase2_option3e_BSrobertabase_electralarge.tsv

In [23]:
# Option 3f: Bertscore with robertalarge:
score_model = 'roberta-large'
letter = 'f'
substitutes_df, score_model_name_str = substitute_selection_phase_2_option_3(data, substitutes_df, lm_tokenizer, fill_mask, model_name_str, nlp, score_model, letter)

SS_phase2_option3f_BSrobertalarge_electralarge exported to csv in path './predictions/trial/SS_phase2_option3f_BSrobertalarge_electralarge.tsv'



python tsar_eval.py --gold_file ./data/trial/tsar2022_en_trial_gold_no_noise.tsv --predictions_file ./predictions/trial/SS_phase2_option3f_BSrobertalarge_electralarge.tsv --output_file ./output/trial/SS_phase2_option3f_BSrobertalarge_electralarge.tsv

#### Results
Based on the accumulated scores, the best model per strategy, resulting from the SS step phase 2, proceeds to the SR step (refer to table 4.12 in section 4.3.4 of the thesis):

- Regarding shared synsets: SS_phase2_option1_SharedSyns_robertabase (accum. score: 5.3215). Model name in thesis: RB_Syns-shared.
- Regarding shared hypernyms: SS_phase2_option2b_SharedHyper2_robertabase (accum. score: 5.8015). Model name in thesis: RB_Hyper2-shared.
- Regarding BERTScore similarity scores: SS_phase2_option3f_BSrobertalarge_robertabase (accum. score: 5.2656). Model name in thesis: RB_BSrl.