# About the model

The model is adapted from the baseline, and uses the same gloss method as described in [UAlberta at SemEval 2022 Task 2: Leveraging Glosses and Translations for Multilingual Idiomaticity Detection](https://arxiv.org/abs/2205.14084). 

### Input

For 'sentence1': the target sentence, without context and with the MWE highlighted using [SEP] tokens as in [HIT at SemEval-2022 Task 2: Pre-trained Language Model for Idioms Detection](https://aclanthology.org/2022.semeval-1.28/). For 'sentence2': 2 glosses for each word in the MWE. For PT and GA the gloss is given using the translation of the words into English (using google translate). 


### Evaluation
The model gets a 0.7558 macro-f1 score for zero shot dev data and 0.8817 macro-f1 score for one shot dev data. The model is trained for 9 epochs.

### Future Work
Could still investigate the effect of: more epochs, more data, using glosses in the source language, using more/ less glosses, looking at different translations of a word rather than only using the best according to google, using other translators, using a different wordnet. 

# Setup

In [None]:
# setup

!git clone https://github.com/H-TayyarMadabushi/SemEval_2022_Task2-idiomaticity.git
!git clone https://github.com/H-TayyarMadabushi/AStitchInLanguageModels.git
!pip install transformers
!pip install datasets
!pip install -U deep-translator

import site
site.main()


Cloning into 'SemEval_2022_Task2-idiomaticity'...
remote: Enumerating objects: 123, done.[K
remote: Counting objects: 100% (123/123), done.[K
remote: Compressing objects: 100% (106/106), done.[K
remote: Total 123 (delta 48), reused 61 (delta 15), pack-reused 0[K
Receiving objects: 100% (123/123), 2.50 MiB | 10.25 MiB/s, done.
Resolving deltas: 100% (48/48), done.
Cloning into 'AStitchInLanguageModels'...
remote: Enumerating objects: 1030, done.[K
remote: Counting objects: 100% (17/17), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 1030 (delta 11), reused 4 (delta 4), pack-reused 1013[K
Receiving objects: 100% (1030/1030), 79.59 MiB | 15.84 MiB/s, done.
Resolving deltas: 100% (394/394), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 21.6 MB/s 
Collecting huggingfa

In [None]:
# helper functions

import os
import csv
import re

from pathlib import Path

def load_csv( path, delimiter=',' ) : 
    header = None
    data   = list()
    with open( path, encoding='utf-8') as csvfile:
        reader = csv.reader( csvfile, delimiter=delimiter ) 
        for row in reader : 
            if header is None : 
                header = row
                continue
            data.append( row ) 
    return header, data

def write_csv( data, location ) : 
    with open( location, 'w', encoding='utf-8') as csvfile:
        writer = csv.writer( csvfile ) 
        writer.writerows( data ) 
    print( "Wrote {}".format( location ) ) 
    return

# split some text on spaces or hyphens
def split(text):
    return re.split(' |-', text)

# Preprocess

In [None]:
# translate

from deep_translator import GoogleTranslator 

# store translations for every word inside an MWE from PT and GA
def batch_translate(header, data, target='EN'):
    print('Batch translating...')
    translate = {}
    for elem in data:
        lang = elem[header.index('Language')]
        MWE_split = split(elem[header.index('MWE')])
        if lang != target:
            if lang not in translate:
                translate[lang] = []
            for word in MWE_split:
                if word not in translate[lang]:
                    translate[lang].append(word)
    for lang, words in translate.items():
        translation = GoogleTranslator(lang.lower(), target.lower()).translate_batch(words)
        translate[lang] = {words[i]: translation[i] for i in range(len(words))}
    print('Finished translating')
    return translate


In [None]:
# gloss 
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.corpus import wordnet as wn


# get the first n definitions of a word
def get_gloss(word, n_gloss):
    gloss = [synset.definition().capitalize() for synset in wn.synsets(word)][:n_gloss] # if synset.name().split('.')[0] == word][:n_gloss]
    return gloss + [''] * (n_gloss - len(gloss))

# get the first n definitions for a group of words
def get_glosses(MWE, lang, translations, n_gloss):
    MWE_split = split(MWE)
    if lang != 'EN':
        MWE_split = [translations[lang][word] for word in MWE_split]
    return [get_gloss(word, n_gloss) for word in MWE_split]

# convert a list of lists of strings into a single string, separated by '. '
def collapse(glosses):
    return '. '.join([item for sublist in glosses for item in sublist])


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [None]:
# preprocess
import re

def _get_train_data( data_location, file_name, include_context, include_idiom, sep_idiom, include_gloss, n_gloss=2 ) :
    
    file_name = os.path.join( data_location, file_name ) 

    header, data = load_csv( file_name )

    translations = batch_translate(header, data, target='EN')

    out_header = [ 'label', 'sentence1' ]
    if include_idiom or include_gloss:
        out_header.append('sentence2')
        
    # ['DataID', 'Language', 'MWE', 'Setting', 'Previous', 'Target', 'Next', 'Label']
    out_data = list()
    for elem in data :
        label     = elem[ header.index( 'Label'  ) ]
        sentence1 = elem[ header.index( 'Target' ) ]
        if include_context :
            sentence1 = ' '.join( [ elem[ header.index( 'Previous' ) ], elem[ header.index( 'Target' ) ], elem[ header.index( 'Next' ) ] ] )
        if sep_idiom:
            sentence1 = re.sub(f"({elem[header.index('MWE')]})", '[SEP]\g<1>[SEP]', sentence1)
        
        this_row = [ label, sentence1 ] 
        sentence2 = ''
        if include_idiom :
            sentence2 += elem[ header.index( 'MWE' ) ]
        if include_gloss:
            if include_idiom:
                sentence2 += '.\n'
            glosses = get_glosses(elem[header.index('MWE')], elem[header.index('Language')], translations, n_gloss=n_gloss)
            glosses = collapse(glosses)
            sentence2 += glosses
        if len(sentence2) > 0:
            this_row.append(sentence2)

        out_data.append( this_row )
        assert len( out_header ) == len( this_row )
    return [ out_header ] + out_data


def _get_dev_eval_data( data_location, input_file_name, gold_file_name, include_context, include_idiom, sep_idiom, include_gloss, n_gloss=2 ) :

    input_headers, input_data = load_csv( os.path.join( data_location, input_file_name ) )
    gold_header  = gold_data = None
    if not gold_file_name is None : 
        gold_header  , gold_data  = load_csv( os.path.join( data_location, gold_file_name  ) )
        assert len( input_data ) == len( gold_data )

    translations = batch_translate(input_headers, input_data, target='EN')

    # ['ID', 'Language', 'MWE', 'Previous', 'Target', 'Next']
    # ['ID', 'DataID', 'Language', 'Label']
    
    out_header = [ 'label', 'sentence1' ]
    if include_idiom or include_gloss:
        out_header.append('sentence2')

    out_data = list()
    for index in range( len( input_data ) ) :
        label = 1
        if not gold_file_name is None : 
            this_input_id = input_data[ index ][ input_headers.index( 'ID' ) ]
            this_gold_id  = gold_data [ index ][ gold_header  .index( 'ID' ) ]
            assert this_input_id == this_gold_id
            
            label     = gold_data[ index ][ gold_header.index( 'Label'  ) ]
            
        elem      = input_data[ index ]
        sentence1 = elem[ input_headers.index( 'Target' ) ]
        if include_context :
            sentence1 = ' '.join( [ elem[ input_headers.index( 'Previous' ) ], elem[ input_headers.index( 'Target' ) ], elem[ input_headers.index( 'Next' ) ] ] )
        if sep_idiom:
            sentence1 = re.sub(f"({elem[input_headers.index('MWE')]})", '[SEP]\g<1>[SEP]', sentence1)
        
        this_row = [ label, sentence1 ] 
        sentence2 = ''
        if include_idiom :
            sentence2 += elem[ input_headers.index( 'MWE' ) ]
        if include_gloss:
            if include_idiom:
                sentence2 += '.\n'
            glosses = get_glosses(elem[input_headers.index('MWE')], elem[input_headers.index('Language')], translations, n_gloss=n_gloss)
            glosses = collapse(glosses)
            sentence2 += glosses
        if len(sentence2) > 0:
            this_row.append(sentence2)

        assert len( out_header ) == len( this_row ) 
        out_data.append( this_row )
        

    return [ out_header ] + out_data


def create_data(input_location, output_location):
    # zero shot data
    train_data = _get_train_data(
            data_location   = input_location,
            file_name       = 'train_zero_shot.csv',
            include_context = False,
            include_idiom   = False,
            sep_idiom       = True,
            include_gloss   = True
        )
    write_csv( train_data, os.path.join( output_location, 'ZeroShot', 'train.csv' ) )

    dev_data = _get_dev_eval_data(
        data_location    = input_location,
        input_file_name  = 'dev.csv',
        gold_file_name   = 'dev_gold.csv', 
        include_context = False,
        include_idiom   = False,
        sep_idiom       = True,
        include_gloss   = True
    )        
    write_csv( dev_data, os.path.join( output_location, 'ZeroShot', 'dev.csv' ) )
    
    eval_data = _get_dev_eval_data(
        data_location    = input_location,
        input_file_name  = 'eval.csv',
        gold_file_name   = None , ## Don't have gold evaluation file -- submit to CodaLab
        include_context = False,
        include_idiom   = False,
        sep_idiom       = True,
        include_gloss   = True
    )
    write_csv( eval_data, os.path.join( output_location, 'ZeroShot', 'eval.csv' ) )

    # one shot data
    train_zero_data = _get_train_data(
        data_location   = input_location,
        file_name       = 'train_zero_shot.csv',
        include_context = False,
        include_idiom   = False,
        sep_idiom       = True,
        include_gloss   = True
    )
    train_one_data = _get_train_data(
        data_location   = input_location,
        file_name       = 'train_one_shot.csv',
        include_context = False,
        include_idiom   = False,
        sep_idiom       = True,
        include_gloss   = True
    )

    assert train_zero_data[0] == train_one_data[0] ## Headers
    train_data = train_one_data + train_zero_data[1:]
    write_csv( train_data, os.path.join( output_location, 'OneShot', 'train.csv' ) )


    dev_data = _get_dev_eval_data(
        data_location    = input_location,
        input_file_name  = 'dev.csv',
        gold_file_name   = 'dev_gold.csv', 
        include_context  = False,
        include_idiom    = False,
        sep_idiom       = True,
        include_gloss   = True
    )        
    write_csv( dev_data, os.path.join( output_location, 'OneShot', 'dev.csv' ) )
    
    eval_data = _get_dev_eval_data(
        data_location    = input_location,
        input_file_name  = 'eval.csv',
        gold_file_name   = None,
        include_context  = False,
        include_idiom    = False,
        sep_idiom       = True,
        include_gloss   = True
    )
    write_csv( eval_data, os.path.join( output_location, 'OneShot', 'eval.csv' ) )

In [None]:
outpath = 'Data'

Path( os.path.join( outpath, 'ZeroShot' ) ).mkdir(parents=True, exist_ok=True)
Path( os.path.join( outpath, 'OneShot' ) ).mkdir(parents=True, exist_ok=True)

create_data( 'SemEval_2022_Task2-idiomaticity/SubTaskA/Data/', outpath )

Batch translating...
Finished translating
Wrote Data/ZeroShot/train.csv
Batch translating...
Finished translating
Wrote Data/ZeroShot/dev.csv
Batch translating...
Finished translating
Wrote Data/ZeroShot/eval.csv
Batch translating...
Finished translating
Batch translating...
Finished translating
Wrote Data/OneShot/train.csv
Batch translating...
Finished translating
Wrote Data/OneShot/dev.csv
Batch translating...
Finished translating
Wrote Data/OneShot/eval.csv


# Zero Shot


## Train

In [None]:
# train zero shot
!python /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.py \
    	--model_name_or_path 'bert-base-multilingual-cased' \
    	--do_train \
    	--do_eval \
    	--max_seq_length 128 \
    	--per_device_train_batch_size 32 \
    	--learning_rate 2e-5 \
    	--num_train_epochs 9 \
    	--evaluation_strategy "epoch" \
    	--output_dir models/ZeroShot/0/ \
    	--seed 0 \
    	--train_file      Data/ZeroShot/train.csv \
    	--validation_file Data/ZeroShot/dev.csv \
	    --evaluation_strategy "epoch" \
	    --save_strategy "epoch"  \
	    --load_best_model_at_end \
	    --metric_for_best_model "f1" \
	    --save_total_limit 1

INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=True,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for

## Save model

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [None]:
## Create save path
!mkdir -p /content/gdrive/MyDrive/ColabData/SemEval2022Task2/TaskA/ZeroShot/0/
## Copy saved model.
!cp -r /content/models/ZeroShot/0/* /content/gdrive/MyDrive/ColabData/SemEval2022Task2/TaskA/ZeroShot/0/

In [None]:
## Bring back saved model here. 
#!mkdir -p /content/models/ZeroShot/0/
# !cp -r /content/gdrive/MyDrive/ColabData/SemEval2022Task2/TaskA/ZeroShot/0/* /content/models/ZeroShot/0/

## Evaluate on dev

In [None]:
# fix run_glue_f1_macro
# remove_columns_() doesn't exist anymore so change to remove_columns()

!cp /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.py /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.txt

with open('/content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.txt', 'r+') as f:
    text = f.read()
    text = text.replace('remove_columns_(', 'remove_columns(')
with open('/content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.txt', 'w') as f:
    f.write(text)

!cp /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.txt /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.py
!rm /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.txt

In [None]:
!python /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.py \
    	--model_name_or_path '/content/models/ZeroShot/0' \
    	--do_predict \
    	--max_seq_length 128 \
    	--per_device_train_batch_size 32 \
    	--learning_rate 2e-5 \
    	--num_train_epochs 9 \
    	--evaluation_strategy "epoch" \
    	--output_dir models/ZeroShot/0/eval-dev/ \
    	--seed 0 \
    	--train_file      Data/ZeroShot/train.csv \
    	--validation_file Data/ZeroShot/dev.csv \
      --test_file Data/ZeroShot/dev.csv \
	    --evaluation_strategy "epoch" \
	    --save_strategy "epoch"  \
	    --load_best_model_at_end \
	    --metric_for_best_model "f1" \
	    --save_total_limit 1

INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=True,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for

In [None]:
# create submission file for dev data

def insert_to_submission_file( submission_format_file, input_file, prediction_format_file, setting ) :
    submission_header, submission_content = load_csv( submission_format_file )
    input_header     , input_data         = load_csv( input_file             )
    prediction_header, prediction_data    = load_csv( prediction_format_file, '\t' )

    assert len( input_data ) == len( prediction_data )

    ## submission_header ['ID', 'Language', 'Setting', 'Label']
    ## input_header      ['label', 'sentence1' ]
    ## prediction_header ['index', 'prediction']

    prediction_data = list( reversed( prediction_data ) )

    started_insert  = False
    for elem in submission_content : 
        if elem[ submission_header.index( 'Setting' ) ] != setting :
            if started_insert :
                if len( prediction_data ) == 0 :
                    break
                else : 
                    raise Exception( "Update should to contiguous ... something wrong." ) 
            continue
        started_insert = True
        elem[ submission_header.index( 'Label' ) ] = prediction_data.pop()[ prediction_header.index( 'prediction' ) ]

    return [ submission_header ] + submission_content

params = {
    'submission_format_file' : '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/Data/dev_submission_format.csv' ,
    'input_file'             : '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/Data/dev.csv'                   ,
    'prediction_format_file' : '/content/models/ZeroShot/0/eval-dev/test_results_None.txt'                        ,
    }
params[ 'setting' ] = 'zero_shot'

updated_data = insert_to_submission_file( **params )

!mkdir -p outputs

write_csv( updated_data, 'outputs/zero_shot_dev_formated.csv' ) 

# run eval script for dev data

import sys
sys.path.append( '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/' ) 
from SubTask1Evaluator import evaluate_submission


submission_file = 'outputs/zero_shot_dev_formated.csv'
gold_file       = '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/Data/dev_gold.csv'

results = evaluate_submission( submission_file, gold_file )
%reload_ext google.colab.data_table
import pandas as pd
df = pd.DataFrame(data=results[1:], columns=results[0])
df

Wrote outputs/zero_shot_dev_formated.csv


Unnamed: 0,Settings,Languages,F1 Score (Macro)
0,zero_shot,EN,0.749062
1,zero_shot,PT,0.745726
2,zero_shot,"EN,PT",0.755833
3,one_shot,EN,"(None, None, None)"
4,one_shot,PT,"(None, None, None)"
5,one_shot,"EN,PT","(None, None, None)"


# One Shot

## Train

In [None]:
# train one shot
!python /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.py \
    	--model_name_or_path 'bert-base-multilingual-cased' \
    	--do_train \
    	--do_eval \
    	--max_seq_length 128 \
    	--per_device_train_batch_size 32 \
    	--learning_rate 2e-5 \
    	--num_train_epochs 9 \
    	--evaluation_strategy "epoch" \
    	--output_dir models/OneShot/1/ \
    	--seed 1 \
    	--train_file      Data/OneShot/train.csv \
    	--validation_file Data/OneShot/dev.csv \
	    --evaluation_strategy "epoch" \
	    --save_strategy "epoch"  \
	    --load_best_model_at_end \
	    --metric_for_best_model "f1" \
	    --save_total_limit 1

INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=True,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for

## Save model

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
## Create save path
!mkdir -p /content/gdrive/MyDrive/ColabData/SemEval2022Task2/TaskA/OneShot/1/
## Copy saved model.
!cp -r /content/models/OneShot/1/* /content/gdrive/MyDrive/ColabData/SemEval2022Task2/TaskA/OneShot/1/

In [None]:
## Bring back saved model here. 
#!mkdir -p /content/models/OneShot/1/
# !cp -r /content/gdrive/MyDrive/ColabData/SemEval2022Task2/TaskA/OneShot/1/* /content/models/OneShot/1/

## Evaluate on dev

In [None]:
# eval on dev data
!python /content/AStitchInLanguageModels/Dataset/Task2/Utils/run_glue_f1_macro.py \
    	--model_name_or_path '/content/models/OneShot/1' \
    	--do_predict \
    	--max_seq_length 128 \
    	--per_device_train_batch_size 32 \
    	--learning_rate 2e-5 \
    	--num_train_epochs 20 \
    	--evaluation_strategy "epoch" \
    	--output_dir models/OneShot/1/eval-dev/ \
    	--seed 1 \
    	--train_file      Data/OneShot/train.csv \
    	--validation_file Data/OneShot/dev.csv \
      --test_file Data/OneShot/dev.csv \
	    --evaluation_strategy "epoch" \
	    --save_strategy "epoch"  \
	    --load_best_model_at_end \
	    --metric_for_best_model "f1" \
	    --save_total_limit 1

INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=True,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for

In [None]:
# create submission file for dev data

params = {
    'submission_format_file' : '/content/outputs/zero_shot_dev_formated.csv' ,
    'input_file'             : '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/Data/dev.csv'                   ,
    'prediction_format_file' : '/content/models/OneShot/1/eval-dev/test_results_None.txt'                        ,
    }
params[ 'setting' ] = 'one_shot'

updated_data = insert_to_submission_file( **params )
write_csv( updated_data, 'outputs/both_dev_formated.csv' ) 

# run eval script

import sys
sys.path.append( '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/' ) 
from SubTask1Evaluator import evaluate_submission


submission_file = 'outputs/both_dev_formated.csv'
gold_file       = '/content/SemEval_2022_Task2-idiomaticity/SubTaskA/Data/dev_gold.csv'

results = evaluate_submission( submission_file, gold_file )
%reload_ext google.colab.data_table
import pandas as pd
df = pd.DataFrame(data=results[1:], columns=results[0])
df

Wrote outputs/both_dev_formated.csv


Unnamed: 0,Settings,Languages,F1 Score (Macro)
0,zero_shot,EN,0.749062
1,zero_shot,PT,0.745726
2,zero_shot,"EN,PT",0.755833
3,one_shot,EN,0.879909
4,one_shot,PT,0.874036
5,one_shot,"EN,PT",0.881671
