## Build Enviroment

In [92]:
!pip install -r requirements.txt

# Train a model

To train a model you should specify the arguments, for example:

```bash
python run_metonymy_resolution.py \
--data_dir ../data \
--train_file conll_train.json \
--predict_file conll_test.json \
--output_dir ../output \
--do_train  \
--do_eval \
--do_mask
```

# You can also use our pretrained model

Download from, https://drive.google.com/file/d/1PCXkEFyK5OALQbF_64J6jSGO0IUjbYuf/view?usp=sharing

Unzip and put it to ``model_folder`` (you local path).

This is a pretrained bert-base-uncased model, use ./data/conll_train.json.

# Load pretrained model from ``model_folder``

In [2]:
import numpy as np
import sys
sys.path.append("./src")
from utils_metonymy import *

model_class = BertForWordClassification
tokenizer_class = BertTokenizer

In [3]:
model_folder = './output' # specify which dir the model been saved.

model = model_class.from_pretrained(model_folder)
tokenizer = tokenizer_class.from_pretrained(model_folder, do_lower_case=True)

In [84]:
def single_detect(example):
    tgt_word = " ".join(example["sentence"][example["pos"][0]:example["pos"][1]])
    sent = " ".join(example["sentence"])
    tmp = copy.deepcopy(example)
    inputs = convert_single_example_to_input(tmp, tokenizer)
    
    model.zero_grad()
    model.eval()

    logits = model(**inputs)[0]

    preds = logits.detach().cpu().numpy()
    preds = np.argmax(preds, axis=1)

    label_map = {0:'literal',1:'metonymy'}
    
    print(f'------------------------------')
    print(f'Target word:   {tgt_word}')
    print(f'Sentence:      {sent}')
    print(f'Prediction:    {label_map[preds[0]]}')

# Input example & Run the model on the input example

In [85]:
example = {'sentence': ['SOCCER', '-', 'ROMANIA', 'BEAT', 'LITHUANIA', 'IN', 'UNDER-21', 'MATCH.'],
           'pos': [2, 3]}
single_detect(example)

------------------------------
Target word:   ROMANIA
Sentence:      SOCCER - ROMANIA BEAT LITHUANIA IN UNDER-21 MATCH.
Prediction:    metonymy


# End-to-End Metonymy Resolution

In [91]:
!pip install -U spacy
!python -m spacy download en_core_web_sm

In [20]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [87]:
def end2end_detect(sentence):
    tokens = nlp(sentence)
    pos = [-1,-1]
    examples = []
    token_text = list(map(lambda x: x.text, tokens))
    for i,token in enumerate(tokens):
        if token.ent_iob_ == 'O':
            if pos == [-1, -1]:
                continue
            else:
                pos[1] = i
                examples.append({'sentence': token_text, 'pos': pos})
                pos = [-1,-1]
        elif token.ent_iob_ == 'B' and token.ent_type_ == 'GPE':
            pos = [i,i]
        elif token.ent_iob_ == 'I' and token.ent_type_ == 'GPE':
            pos[1] = i
            
    for example in examples:
        single_detect(example)

In [88]:
sentence = "Los Angeles lost in the semi-final."
end2end_detect(sentence)

------------------------------
Target word:   Los Angeles
Sentence:      Los Angeles lost in the semi - final .
Prediction:    metonymy


In [89]:
sentence = "Moscow talks to Beijing."
end2end_detect(sentence)

------------------------------
Target word:   Moscow
Sentence:      Moscow talks to Beijing .
Prediction:    metonymy
------------------------------
Target word:   Beijing
Sentence:      Moscow talks to Beijing .
Prediction:    metonymy


In [90]:
sentence = "I used to live in Germany."
end2end_detect(sentence)

------------------------------
Target word:   Germany
Sentence:      I used to live in Germany .
Prediction:    literal
