## TEST set: Substitute Ranking (SR) step with hypernym relations:

In [1]:
import pandas as pd
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag, word_tokenize

# initialize the WordNet lemmatizer
lemmatizer = WordNetLemmatizer()


In [2]:
from nltk.corpus import wordnet as wn
import spacy
nlp = spacy.load("en_core_web_sm")

### for model  SS_bsRobertalarge_robertabase:

### Substitute Ranking option 1a:  rank the substitutes that are a 1-level up hypernym of the complex word first:

In [3]:
# read the predictions file into a df
pred_df = pd.read_csv('./predictions/test/SS_phase2_option3f_BSrobertalarge_robertabase.tsv', sep='\t', header=None)

# for each row in the predictions df:
for index, row in pred_df.iterrows():
    sentence = row[0]
    complex_word = row[1]
    substitutes = row[2:12]

    # step a: get the complex word lemma, the complex word synsets, and its first level hypernym
    doc_complex_word = nlp(complex_word)
    complex_word_lemma = doc_complex_word[0].lemma_
    complex_word_synsets = wn.synsets(complex_word_lemma)
    complex_word_hypernyms_1 = [h for syn in complex_word_synsets for h in syn.hypernyms()]
    complex_word_hypernyms_1_lemmas = [lemma for h in complex_word_hypernyms_1 for lemma in h.lemma_names()]
    # print(f"Substitute Ranking (SR), option 1-a, step a): complex_word_hypernyms_lemmas (1st level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_1_lemmas}\n")
   
    

    # step b: get the lemma and synsets of the substitutes, and store the original substitutes with the lemmas and synsets
    substitute_lemmas_synsets = []
    for substitute in substitutes:
        doc_substitute = nlp(substitute)
        substitute_lemma = doc_substitute[0].lemma_
        substitute_synsets = wn.synsets(substitute_lemma)
        substitute_lemmas_synsets.append((substitute, substitute_lemma, substitute_synsets)) 
    # print(f"Substitute Ranking (SR), option 1-a, step b): substitute lemmas synsets: {substitute_lemmas_synsets}\n")
       

    ## step c: get the intersection of the substitute synsets with the 1st level hypernyms of the complex word
    intersection_1_substitutes = []
    other_substitutes = []

    for substitute, substitute_lemma, substitute_synsets in substitute_lemmas_synsets:
        # get the lemmas of the substitute synsets
        substitute_synsets_lemmas = [lemma for syn in substitute_synsets for lemma in syn.lemma_names()] 

        # check if the substitute belongs to a synset that is the same as the 1st level hypernym of the complex word
        intersection_1 = set(complex_word_hypernyms_1_lemmas).intersection(set(substitute_synsets_lemmas))
        if intersection_1:
            intersection_1_substitutes.append(substitute)  # append original substitute
        else:
            other_substitutes.append(substitute)  # append original substitute
            
            
    # print(f"Substitute Ranking (SR) option 1a, step c): list of substitutes of which their synsets are the same as the first level hypernyms of the complex word '{complex_word}' in Wordnet: {intersection_1_substitutes}\n")
    # print(f"Substitute Ranking (SR) option 1a, step c): list of substitutes of which their synsets are NOT the same as the one-level hypernym of the complex word '{complex_word}' in Wordnet: {other_substitutes}\n")     

      
    ## step d: create the final list, by putting the intersection first in the list, appending the list with the other substitutes
    final_list = intersection_1_substitutes + other_substitutes
#     print(f"Substitute Ranking (SR) option 1a, step d): substitutes sorted on whether they belong to a synset that is the same as the first level hypernym of the complex word '{complex_word}' in Wordnet first:  {final_list}\n")

#     print('---------------------------------------------------------------------------------------------------------------------------------------------')
    
    # add the sentence, complex_word, and the substitutes to the dataframe 
    pred_df.loc[index] = [sentence, complex_word] +  final_list

    
# export the dataframe to tsv for evaluation
pred_df.to_csv("./predictions/test/SR_option1aShared1first_robertabase.tsv", sep="\t", index=False, header=False)
print("SR_option1aShared1first_robertabase exported to csv in path './predictions/test/SR_option1aShared1first_robertabase.tsv'}\n")
    

SR_option1aShared1first_robertabase exported to csv in path './predictions/test/SR_option1aShared1first_robertabase.tsv'}



In [None]:
python tsar_eval.py --gold_file ./data/test/tsar2022_en_test_gold_no_noise.tsv --predictions_file ./predictions/test/SR_option1aShared1first_robertabase.tsv --output_file ./output/test/SR_option1aShared1first_robertabase.tsv

### Substitute Ranking option 1b:  rank the substitutes that are a 2-level up hypernym of the complex word first:

In [4]:
# read the predictions file into a df
pred_df = pd.read_csv('./predictions/test/SS_phase2_option3f_BSrobertalarge_robertabase.tsv', sep='\t', header=None)

# for each row in the predictions df:
for index, row in pred_df.iterrows():
    sentence = row[0]
    complex_word = row[1]
    substitutes = row[2:12]

    # step a: get the complex word lemma, the complex word synsets, and its first level hypernym
    doc_complex_word = nlp(complex_word)
    complex_word_lemma = doc_complex_word[0].lemma_
    complex_word_synsets = wn.synsets(complex_word_lemma)
    complex_word_hypernyms_1 = [h for syn in complex_word_synsets for h in syn.hypernyms()]
    complex_word_hypernyms_1_lemmas = [lemma for h in complex_word_hypernyms_1 for lemma in h.lemma_names()]
    complex_word_hypernyms_2 = [h2 for h1 in complex_word_hypernyms_1 for h2 in h1.hypernyms()]
    complex_word_hypernyms_2_lemmas = [lemma for h in complex_word_hypernyms_2 for lemma in h.lemma_names()]
    # print(f"Substitute Ranking (SR), option 1-b, step a): complex_word_hypernyms_lemmas (2nd level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_2_lemmas}\n")
   
    

    # step b: get the lemma and synsets of the substitutes, and store the original substitutes with the lemmas and synsets
    substitute_lemmas_synsets = []
    for substitute in substitutes:
        doc_substitute = nlp(substitute)
        substitute_lemma = doc_substitute[0].lemma_
        substitute_synsets = wn.synsets(substitute_lemma)
        substitute_lemmas_synsets.append((substitute, substitute_lemma, substitute_synsets)) 
    # print(f"Substitute Ranking (SR), option 1-b, step b): substitute lemmas synsets: {substitute_lemmas_synsets}\n")
       

    ## step c: get the intersection of the substitute synsets with the 2nd level hypernyms of the complex word
    intersection_2_substitutes = []
    other_substitutes = []

    for substitute, substitute_lemma, substitute_synsets in substitute_lemmas_synsets:
        # get the lemmas of the substitute synsets
        substitute_synsets_lemmas = [lemma for syn in substitute_synsets for lemma in syn.lemma_names()] 

        # check if the substitute belongs to a synset that is the same as the 1st level hypernym of the complex word
        intersection_2 = set(complex_word_hypernyms_2_lemmas).intersection(set(substitute_synsets_lemmas))
        if intersection_2:
            intersection_2_substitutes.append(substitute)  # append original substitute
        else:
            other_substitutes.append(substitute)  # append original substitute
            
            
    # print(f"Substitute Ranking (SR) option 1b, step c): list of substitutes of which their synsets are the same as the second level hypernyms of the complex word '{complex_word}' in Wordnet: {intersection_2_substitutes}\n")
    # print(f"Substitute Ranking (SR) option 1b, step c): list of substitutes of which their synsets are NOT the same as the second-level hypernym of the complex word '{complex_word}' in Wordnet: {other_substitutes}\n")     

      
    ## step d: create the final list, by putting the intersection first in the list, appending the list with the other substitutes
    final_list = intersection_2_substitutes + other_substitutes
#     print(f"Substitute Ranking (SR) option 1b, step d): substitutes sorted on whether they belong to a synset that is the same as the second level hypernym of the complex word '{complex_word}' in Wordnet first:  {final_list}\n")

#     print('---------------------------------------------------------------------------------------------------------------------------------------------')
    
    # add the sentence, complex_word, and the substitutes to the dataframe 
    pred_df.loc[index] = [sentence, complex_word] +  final_list

    
# export the dataframe to tsv for evaluation
pred_df.to_csv("./predictions/test/SR_option1bShared2first_robertabase.tsv", sep="\t", index=False, header=False)
print("SR_option1bShared2first_robertabase exported to csv in path './predictions/test/SR_option1bShared2first_robertabase.tsv'}\n")
    

SR_option1bShared2first_robertabase exported to csv in path './predictions/test/SR_option1bShared2first_robertabase.tsv'}



In [None]:
python tsar_eval.py --gold_file ./data/test/tsar2022_en_test_gold_no_noise.tsv --predictions_file ./predictions/test/SR_option1bShared2first_robertabase.tsv --output_file ./output/test/SR_option1bShared2first_robertabase.tsv

### Substitute Ranking option 1c:  rank the substitutes that are either a first level or a second level hypernym of the complex word first:

In [5]:
# read the predictions file into a df
pred_df = pd.read_csv('./predictions/test/SS_phase2_option3f_BSrobertalarge_robertabase.tsv', sep='\t', header=None)

# for each row in the predictions df:
for index, row in pred_df.iterrows():
    sentence = row[0]
    complex_word = row[1]
    substitutes = row[2:12]

    # step a: get the complex word lemma, the complex word synsets, and its first level hypernym
    doc_complex_word = nlp(complex_word)
    complex_word_lemma = doc_complex_word[0].lemma_
    complex_word_synsets = wn.synsets(complex_word_lemma)
    complex_word_hypernyms_1 = [h for syn in complex_word_synsets for h in syn.hypernyms()]
    complex_word_hypernyms_1_lemmas = [lemma for h in complex_word_hypernyms_1 for lemma in h.lemma_names()]
    complex_word_hypernyms_2 = [h2 for h1 in complex_word_hypernyms_1 for h2 in h1.hypernyms()]
    complex_word_hypernyms_2_lemmas = [lemma for h in complex_word_hypernyms_2 for lemma in h.lemma_names()]
    # print(f"Substitute Ranking (SR), option 1-c, step a): complex_word_hypernyms_lemmas (1st level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_1_lemmas}\n")
    # print(f"Substitute Ranking (SR), option 1-c, step a): complex_word_hypernyms_lemmas (2nd level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_2_lemmas}\n")
   
    

    # step b: get the lemma and synsets of the substitutes, and store the original substitutes with the lemmas and synsets
    substitute_lemmas_synsets = []
    for substitute in substitutes:
        doc_substitute = nlp(substitute)
        substitute_lemma = doc_substitute[0].lemma_
        substitute_synsets = wn.synsets(substitute_lemma)
        substitute_lemmas_synsets.append((substitute, substitute_lemma, substitute_synsets)) 
    # print(f"Substitute Ranking (SR), option 1-c, step b): substitute lemmas synsets: {substitute_lemmas_synsets}\n")
       

    ## step c: get the intersection of the substitute synsets with: the 1st and the 2nd level hypernyms of the complex word
    intersection_1_2_substitutes = []
    other_1_2_substitutes = []

    for substitute, substitute_lemma, substitute_synsets in substitute_lemmas_synsets:
        # get the lemmas of the substitute synsets
        substitute_synsets_lemmas = [lemma for syn in substitute_synsets for lemma in syn.lemma_names()] 

        # check if the substitute belongs to a synset that is the same as the 1st or 2nd level hypernym of the complex word
        intersection_1_2 = set(complex_word_hypernyms_1_lemmas + complex_word_hypernyms_2_lemmas).intersection(set(substitute_synsets_lemmas))
        if intersection_1_2:
            intersection_1_2_substitutes.append(substitute)  # append original substitute
        else:
            other_1_2_substitutes.append(substitute)  # append original substitute
            
            
    # print(f"Substitute Ranking (SR) option 1c, step c): list of substitutes of which their synsets are the same as the 1st or the 2nd level hypernyms of the complex word '{complex_word}' in Wordnet: {intersection_1_2_substitutes}\n")
    # print(f"Substitute Ranking (SR) option 1c, step c): list of substitutes of which their synsets are NOT the same as the 1st or the 2nd level hypernym of the complex word '{complex_word}' in Wordnet: {other_1_2_substitutes}\n")     

      
    ## step d: create the final list, by putting the intersection first in the list, appending the list with the other substitutes
    final_list = intersection_1_2_substitutes + other_1_2_substitutes
#     print(f"Substitute Ranking (SR) option 1c, step d): substitutes sorted on whether they belong to a synset that is the same as the 1st or the 2nd level hypernym of the complex word '{complex_word}' in Wordnet first:  {final_list}\n")

#     print('---------------------------------------------------------------------------------------------------------------------------------------------')
    
    # add the sentence, complex_word, and the substitutes to the dataframe 
    pred_df.loc[index] = [sentence, complex_word] +  final_list

    
# export the dataframe to tsv for evaluation
pred_df.to_csv("./predictions/test/SR_option1cShared1+2first_robertabase.tsv", sep="\t", index=False, header=False)
print("SR_option1cShared1+2first_robertabase exported to csv in path './predictions/test/SR_option1cShared1+2first_robertabase.tsv'}\n")

SR_option1cShared1+2first_robertabase exported to csv in path './predictions/test/SR_option1cShared1+2first_robertabase.tsv'}



python tsar_eval.py --gold_file ./data/test/tsar2022_en_test_gold_no_noise.tsv --predictions_file ./predictions/test/SR_option1cShared1+2first_robertabase.tsv --output_file ./output/test/SR_option1cShared1+2first_robertabase.tsv

### for model SS_bsRobertalarge_electralarge:


### Substitute Ranking option 1a:  rank the substitutes that are a 1-level up hypernym of the complex word first:

In [8]:
# read the predictions file into a df
pred_df = pd.read_csv('./predictions/test/SS_phase2_option3f_BSrobertalarge_electralarge.tsv', sep='\t', header=None)

# for each row in the predictions df:
for index, row in pred_df.iterrows():
    sentence = row[0]
    complex_word = row[1]
    substitutes = row[2:12]

    # step a: get the complex word lemma, the complex word synsets, and its first level hypernym
    doc_complex_word = nlp(complex_word)
    complex_word_lemma = doc_complex_word[0].lemma_
    complex_word_synsets = wn.synsets(complex_word_lemma)
    complex_word_hypernyms_1 = [h for syn in complex_word_synsets for h in syn.hypernyms()]
    complex_word_hypernyms_1_lemmas = [lemma for h in complex_word_hypernyms_1 for lemma in h.lemma_names()]
    # print(f"Substitute Ranking (SR), option 1-a, step a): complex_word_hypernyms_lemmas (1st level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_1_lemmas}\n")
   
    

    # step b: get the lemma and synsets of the substitutes, and store the original substitutes with the lemmas and synsets
    substitute_lemmas_synsets = []
    for substitute in substitutes:
        doc_substitute = nlp(substitute)
        substitute_lemma = doc_substitute[0].lemma_
        substitute_synsets = wn.synsets(substitute_lemma)
        substitute_lemmas_synsets.append((substitute, substitute_lemma, substitute_synsets)) 
    # print(f"Substitute Ranking (SR), option 1-a, step b): substitute lemmas synsets: {substitute_lemmas_synsets}\n")
       

    ## step c: get the intersection of the substitute synsets with the 1st level hypernyms of the complex word
    intersection_1_substitutes = []
    other_substitutes = []

    for substitute, substitute_lemma, substitute_synsets in substitute_lemmas_synsets:
        # get the lemmas of the substitute synsets
        substitute_synsets_lemmas = [lemma for syn in substitute_synsets for lemma in syn.lemma_names()] 

        # check if the substitute belongs to a synset that is the same as the 1st level hypernym of the complex word
        intersection_1 = set(complex_word_hypernyms_1_lemmas).intersection(set(substitute_synsets_lemmas))
        if intersection_1:
            intersection_1_substitutes.append(substitute)  # append original substitute
        else:
            other_substitutes.append(substitute)  # append original substitute
            
            
    # print(f"Substitute Ranking (SR) option 1a, step c): list of substitutes of which their synsets are the same as the first level hypernyms of the complex word '{complex_word}' in Wordnet: {intersection_1_substitutes}\n")
    # print(f"Substitute Ranking (SR) option 1a, step c): list of substitutes of which their synsets are NOT the same as the one-level hypernym of the complex word '{complex_word}' in Wordnet: {other_substitutes}\n")     

      
    ## step d: create the final list, by putting the intersection first in the list, appending the list with the other substitutes
    final_list = intersection_1_substitutes + other_substitutes
#     print(f"Substitute Ranking (SR) option 1a, step d): substitutes sorted on whether they belong to a synset that is the same as the first level hypernym of the complex word '{complex_word}' in Wordnet first:  {final_list}\n")

#     print('---------------------------------------------------------------------------------------------------------------------------------------------')
    
    # add the sentence, complex_word, and the substitutes to the dataframe 
    pred_df.loc[index] = [sentence, complex_word] +  final_list

    
# export the dataframe to tsv for evaluation
pred_df.to_csv("./predictions/test/SR_option1aShared1first_electralarge.tsv", sep="\t", index=False, header=False)
print("SR_option1aShared1first_electralarge exported to csv in path './predictions/test/SR_option1aShared1first_electralarge.tsv'}\n")
    

SR_option1aShared1first_electralarge exported to csv in path './predictions/test/SR_option1aShared1first_electralarge.tsv'}



python tsar_eval.py --gold_file ./data/test/tsar2022_en_test_gold_no_noise.tsv --predictions_file ./predictions/test/SR_option1aShared1first_electralarge.tsv --output_file ./output/test/SR_option1aShared1first_electralarge.tsv

### Substitute Ranking option 1b:  rank the substitutes that are a 2-level up hypernym of the complex word first:

In [9]:
# read the predictions file into a df
pred_df = pd.read_csv('./predictions/test/SS_phase2_option3f_BSrobertalarge_electralarge.tsv', sep='\t', header=None)

# for each row in the predictions df:
for index, row in pred_df.iterrows():
    sentence = row[0]
    complex_word = row[1]
    substitutes = row[2:12]

    # step a: get the complex word lemma, the complex word synsets, and its first level hypernym
    doc_complex_word = nlp(complex_word)
    complex_word_lemma = doc_complex_word[0].lemma_
    complex_word_synsets = wn.synsets(complex_word_lemma)
    complex_word_hypernyms_1 = [h for syn in complex_word_synsets for h in syn.hypernyms()]
    complex_word_hypernyms_1_lemmas = [lemma for h in complex_word_hypernyms_1 for lemma in h.lemma_names()]
    complex_word_hypernyms_2 = [h2 for h1 in complex_word_hypernyms_1 for h2 in h1.hypernyms()]
    complex_word_hypernyms_2_lemmas = [lemma for h in complex_word_hypernyms_2 for lemma in h.lemma_names()]
    # print(f"Substitute Ranking (SR), option 1-b, step a): complex_word_hypernyms_lemmas (2nd level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_2_lemmas}\n")
   
    

    # step b: get the lemma and synsets of the substitutes, and store the original substitutes with the lemmas and synsets
    substitute_lemmas_synsets = []
    for substitute in substitutes:
        doc_substitute = nlp(substitute)
        substitute_lemma = doc_substitute[0].lemma_
        substitute_synsets = wn.synsets(substitute_lemma)
        substitute_lemmas_synsets.append((substitute, substitute_lemma, substitute_synsets)) 
    # print(f"Substitute Ranking (SR), option 1-b, step b): substitute lemmas synsets: {substitute_lemmas_synsets}\n")
       

    ## step c: get the intersection of the substitute synsets with the 2nd level hypernyms of the complex word
    intersection_2_substitutes = []
    other_substitutes = []

    for substitute, substitute_lemma, substitute_synsets in substitute_lemmas_synsets:
        # get the lemmas of the substitute synsets
        substitute_synsets_lemmas = [lemma for syn in substitute_synsets for lemma in syn.lemma_names()] 

        # check if the substitute belongs to a synset that is the same as the 1st level hypernym of the complex word
        intersection_2 = set(complex_word_hypernyms_2_lemmas).intersection(set(substitute_synsets_lemmas))
        if intersection_2:
            intersection_2_substitutes.append(substitute)  # append original substitute
        else:
            other_substitutes.append(substitute)  # append original substitute
            
            
    # print(f"Substitute Ranking (SR) option 1b, step c): list of substitutes of which their synsets are the same as the second level hypernyms of the complex word '{complex_word}' in Wordnet: {intersection_2_substitutes}\n")
    # print(f"Substitute Ranking (SR) option 1b, step c): list of substitutes of which their synsets are NOT the same as the second-level hypernym of the complex word '{complex_word}' in Wordnet: {other_substitutes}\n")     

      
    ## step d: create the final list, by putting the intersection first in the list, appending the list with the other substitutes
    final_list = intersection_2_substitutes + other_substitutes
#     print(f"Substitute Ranking (SR) option 1b, step d): substitutes sorted on whether they belong to a synset that is the same as the second level hypernym of the complex word '{complex_word}' in Wordnet first:  {final_list}\n")

#     print('---------------------------------------------------------------------------------------------------------------------------------------------')
    
    # add the sentence, complex_word, and the substitutes to the dataframe 
    pred_df.loc[index] = [sentence, complex_word] +  final_list

    
# export the dataframe to tsv for evaluation
pred_df.to_csv("./predictions/test/SR_option1bShared2first_electralarge.tsv", sep="\t", index=False, header=False)
print("SR_option1bShared2first_electralarge exported to csv in path './predictions/test/SR_option1bShared2first_electralarge.tsv'}\n")

SR_option1bShared2first_electralarge exported to csv in path './predictions/test/SR_option1bShared2first_electralarge.tsv'}



In [None]:
python tsar_eval.py --gold_file ./data/test/tsar2022_en_test_gold_no_noise.tsv --predictions_file ./predictions/test/SR_option1bShared2first_electralarge.tsv --output_file ./output/test/SR_option1bShared2first_electralarge.tsv

### Substitute Ranking option 1c:  rank the substitutes that are either a first level or a second level hypernym of the complex word first:

In [10]:
# read the predictions file into a df
pred_df = pd.read_csv('./predictions/test/SS_phase2_option3f_BSrobertalarge_electralarge.tsv', sep='\t', header=None)

# for each row in the predictions df:
for index, row in pred_df.iterrows():
    sentence = row[0]
    complex_word = row[1]
    substitutes = row[2:12]

    # step a: get the complex word lemma, the complex word synsets, and its first level hypernym
    doc_complex_word = nlp(complex_word)
    complex_word_lemma = doc_complex_word[0].lemma_
    complex_word_synsets = wn.synsets(complex_word_lemma)
    complex_word_hypernyms_1 = [h for syn in complex_word_synsets for h in syn.hypernyms()]
    complex_word_hypernyms_1_lemmas = [lemma for h in complex_word_hypernyms_1 for lemma in h.lemma_names()]
    complex_word_hypernyms_2 = [h2 for h1 in complex_word_hypernyms_1 for h2 in h1.hypernyms()]
    complex_word_hypernyms_2_lemmas = [lemma for h in complex_word_hypernyms_2 for lemma in h.lemma_names()]
    # print(f"Substitute Ranking (SR), option 1-c, step a): complex_word_hypernyms_lemmas (1st level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_1_lemmas}\n")
    # print(f"Substitute Ranking (SR), option 1-c, step a): complex_word_hypernyms_lemmas (2nd level hypernyms) for complex word '{complex_word}': {complex_word_hypernyms_2_lemmas}\n")
   
    

    # step b: get the lemma and synsets of the substitutes, and store the original substitutes with the lemmas and synsets
    substitute_lemmas_synsets = []
    for substitute in substitutes:
        doc_substitute = nlp(substitute)
        substitute_lemma = doc_substitute[0].lemma_
        substitute_synsets = wn.synsets(substitute_lemma)
        substitute_lemmas_synsets.append((substitute, substitute_lemma, substitute_synsets)) 
    # print(f"Substitute Ranking (SR), option 1-c, step b): substitute lemmas synsets: {substitute_lemmas_synsets}\n")
       

    ## step c: get the intersection of the substitute synsets with: the 1st and the 2nd level hypernyms of the complex word
    intersection_1_2_substitutes = []
    other_1_2_substitutes = []

    for substitute, substitute_lemma, substitute_synsets in substitute_lemmas_synsets:
        # get the lemmas of the substitute synsets
        substitute_synsets_lemmas = [lemma for syn in substitute_synsets for lemma in syn.lemma_names()] 

        # check if the substitute belongs to a synset that is the same as the 1st or 2nd level hypernym of the complex word
        intersection_1_2 = set(complex_word_hypernyms_1_lemmas + complex_word_hypernyms_2_lemmas).intersection(set(substitute_synsets_lemmas))
        if intersection_1_2:
            intersection_1_2_substitutes.append(substitute)  # append original substitute
        else:
            other_1_2_substitutes.append(substitute)  # append original substitute
            
            
#     print(f"Substitute Ranking (SR) option 1c, step c): list of substitutes of which their synsets are the same as the 1st or the 2nd level hypernyms of the complex word '{complex_word}' in Wordnet: {intersection_1_2_substitutes}\n")
#     print(f"Substitute Ranking (SR) option 1c, step c): list of substitutes of which their synsets are NOT the same as the 1st or the 2nd level hypernym of the complex word '{complex_word}' in Wordnet: {other_1_2_substitutes}\n")     

      
    ## step d: create the final list, by putting the intersection first in the list, appending the list with the other substitutes
    final_list = intersection_1_2_substitutes + other_1_2_substitutes
#     print(f"Substitute Ranking (SR) option 1c, step d): substitutes sorted on whether they belong to a synset that is the same as the 1st or the 2nd level hypernym of the complex word '{complex_word}' in Wordnet first:  {final_list}\n")

#     print('---------------------------------------------------------------------------------------------------------------------------------------------')
    
    # add the sentence, complex_word, and the substitutes to the dataframe 
    pred_df.loc[index] = [sentence, complex_word] +  final_list

    
# export the dataframe to tsv for evaluation
pred_df.to_csv("./predictions/test/SR_option1cShared1+2first_electralarge.tsv", sep="\t", index=False, header=False)
print("SR_option1cShared1+2first_electralarge exported to csv in path './predictions/test/SR_option1cShared1+2first_electralarge.tsv'}\n")

SR_option1cShared1+2first_electralarge exported to csv in path './predictions/test/SR_option1cShared1+2first_electralarge.tsv'}



In [None]:
python tsar_eval.py --gold_file ./data/test/tsar2022_en_test_gold_no_noise.tsv --predictions_file ./predictions/test/SR_option1cShared1+2first_electralarge.tsv --output_file ./output/test/SR_option1cShared1+2first_electralarge.tsv