# SemEval 2022 Dev set EDA: Graph-Parser Baseline

- Introduction: [graph-parsr Github](https://github.com/jerbarnes/semeval22_structured_sentiment/tree/master/baselines/graph_parser)
- Best F1:

|-|opener_en|darmstadt_unis|mpqa|
|:---------:|:---------:|:----------:|:--------:|
|  epoch  |  #100   |    #63   |  #100  | 
|    F1   |  0.5308 |   0.3135 | 0.3671 |

In [1]:
import os
os.chdir('/share/home/qwe9887476/')

In [4]:
import json
root_dir = './semeval22_structured_sentiment/'
prediction_dir = f'{root_dir}/baselines/graph_parser/experiments'
golden_dir = f'{root_dir}/data'
PREDICTIONS = {'darmstadt_unis':None, 'mpqa':None, 'opener_en':None}
GOLDS = {'darmstadt_unis':None, 'mpqa':None, 'opener_en':None}

for DATASET in PREDICTIONS.keys():
    predpath = f'{prediction_dir}/{DATASET}/head_final/dev.conllu.json'
    goldpath = f'{golden_dir}/{DATASET}/dev.json'
    with open (predpath, 'r') as f:
        PREDICTIONS[DATASET] = json.load(f)  
    with open (goldpath, 'r') as f2:
        GOLDS[DATASET] = json.load(f2) 

In [5]:
ALLDEVS = {}

In [6]:
# match by sent-id
from collections import defaultdict
def pred_align(preds, golds):
    '''
    preds: list of dicts
    golds: list of dicts
    dict內需包含 'sent_id' key 
    '''
    SENTID = defaultdict(lambda: {'PRED':None, 'GOLD':None})
    for p, g in zip(preds, golds):
        p_id, g_id = p['sent_id'], g['sent_id']
        SENTID[p_id]['PRED'] = p
        SENTID[g_id]['GOLD'] = g
    return SENTID, list(SENTID.keys())

In [7]:
def show_random(dataset_name, sent_id = None):
    '''
    dataset_name: string
    '''
    from random import randint
    try:
        PGPAIRS, ids = ALLDEVS[dataset_name]
    except KeyError:
        print(f'No {dataset_name} in ALLDEVS.')
    dev_size=len(ids)
    rsid = ids[randint(0, dev_size-1)]
    if sent_id:
        rsid = sent_id
    
    print(f'[INFO] dev set size: {dev_size}')
    print(f'[INFO] sent id: {rsid}')
    pred, gold = PGPAIRS[rsid]['PRED'], PGPAIRS[rsid]['GOLD']
    return pred, gold, rsid

In [9]:
keys = ('opener_en', 'darmstadt_unis', 'mpqa')

In [10]:
for key in keys:
    PGPAIRS, ids = pred_align(PREDICTIONS[key], GOLDS[key])
    ALLDEVS[key] = (PGPAIRS, ids)

## opener_en

### example 1 (long text): correct targets and expressions

In [11]:
key = 'opener_en'
pred, gold, sent_id = show_random(key, sent_id='opener_en/kaf/hotel/english00180_d5ec496d5858acfbab77283dd350b0ec-4')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 249
[INFO] sent id: opener_en/kaf/hotel/english00180_d5ec496d5858acfbab77283dd350b0ec-4


In [12]:
gold

{'sent_id': 'opener_en/kaf/hotel/english00180_d5ec496d5858acfbab77283dd350b0ec-4',
 'text': 'We had a groundfloor room so access for my scootmobile was no problem and parking of the scootmobile was also sorted without any fuss · a great team , staff and management .',
 'opinions': [{'Source': [['We'], ['0:2']],
   'Target': [['groundfloor room'], ['9:25']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [['We'], ['0:2']],
   'Target': [['access for my scootmobile'], ['29:54']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [[], []],
   'Target': [['management'], ['161:171']],
   'Polar_expression': [['great'], ['138:143']],
   'Polarity': 'Positive',
   'Intensity': 'Strong'},
  {'Source': [[], []],
   'Target': [['team'], ['144:148']],
   'Polar_expression': [['great'], ['138:143']],
   'Polarity': 'Positive',
   'Intensity': 'Strong'},
 

In [13]:
pred

{'sent_id': 'opener_en/kaf/hotel/english00180_d5ec496d5858acfbab77283dd350b0ec-4',
 'text': 'We had a groundfloor room so access for my scootmobile was no problem and parking of the scootmobile was also sorted without any fuss · a great team , staff and management .',
 'opinions': [{'Source': [[], []],
   'Target': [['parking of the scootmobile'], ['74:100']],
   'Polar_expression': [['sorted without any fuss'], ['110:133']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [['We'], ['0:2']],
   'Target': [['groundfloor room', 'access'], ['9:25', '29:35']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [['We'], ['0:2']],
   'Target': [['groundfloor', 'access for my scootmobile'], ['9:20', '29:54']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [[], []],
   'Target': [['team'], ['144:148']],
   'Polar_expression': [['g

### example 2: correct targets and expressions

In [29]:
key = 'opener_en'
pred, gold, sent_id = show_random(key, sent_id='opener_en/kaf/hotel/english00163_f2de7a0618bb010682e1d91c2e554c63-3')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 249
[INFO] sent id: opener_en/kaf/hotel/english00163_f2de7a0618bb010682e1d91c2e554c63-3


In [14]:
gold

{'sent_id': 'opener_en/kaf/hotel/english00180_d5ec496d5858acfbab77283dd350b0ec-4',
 'text': 'We had a groundfloor room so access for my scootmobile was no problem and parking of the scootmobile was also sorted without any fuss · a great team , staff and management .',
 'opinions': [{'Source': [['We'], ['0:2']],
   'Target': [['groundfloor room'], ['9:25']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [['We'], ['0:2']],
   'Target': [['access for my scootmobile'], ['29:54']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [[], []],
   'Target': [['management'], ['161:171']],
   'Polar_expression': [['great'], ['138:143']],
   'Polarity': 'Positive',
   'Intensity': 'Strong'},
  {'Source': [[], []],
   'Target': [['team'], ['144:148']],
   'Polar_expression': [['great'], ['138:143']],
   'Polarity': 'Positive',
   'Intensity': 'Strong'},
 

In [15]:
pred

{'sent_id': 'opener_en/kaf/hotel/english00180_d5ec496d5858acfbab77283dd350b0ec-4',
 'text': 'We had a groundfloor room so access for my scootmobile was no problem and parking of the scootmobile was also sorted without any fuss · a great team , staff and management .',
 'opinions': [{'Source': [[], []],
   'Target': [['parking of the scootmobile'], ['74:100']],
   'Polar_expression': [['sorted without any fuss'], ['110:133']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [['We'], ['0:2']],
   'Target': [['groundfloor room', 'access'], ['9:25', '29:35']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [['We'], ['0:2']],
   'Target': [['groundfloor', 'access for my scootmobile'], ['9:20', '29:54']],
   'Polar_expression': [['no problem'], ['59:69']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [[], []],
   'Target': [['team'], ['144:148']],
   'Polar_expression': [['g

### example 3: no target, wrong expression

In [16]:
key = 'opener_en'
pred, gold, sent_id = show_random(key, sent_id='opener_en/kaf/hotel/english00016_1bf091a4517b245ddc05ed2efd135973-7')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 249
[INFO] sent id: opener_en/kaf/hotel/english00016_1bf091a4517b245ddc05ed2efd135973-7


In [17]:
gold

{'sent_id': 'opener_en/kaf/hotel/english00016_1bf091a4517b245ddc05ed2efd135973-7',
 'text': "But what I really didn 't like was the music in the lobby bar en all the other bars , way to old !",
 'opinions': [{'Source': [['I'], ['9:10']],
   'Target': [['the music'], ['35:44']],
   'Polar_expression': [["really didn 't like"], ['11:30']],
   'Polarity': 'Negative',
   'Intensity': 'Strong'}]}

In [18]:
pred

{'sent_id': 'opener_en/kaf/hotel/english00016_1bf091a4517b245ddc05ed2efd135973-7',
 'text': "But what I really didn 't like was the music in the lobby bar en all the other bars , way to old !",
 'opinions': [{'Source': [[], []],
   'Target': [[], []],
   'Polar_expression': [['way to old !'], ['86:98']],
   'Polarity': 'Negative',
   'Intensity': 'Standard'}]}

## darmstadt_unis

### example 1: correct target but wrong polarity

In [90]:
key = 'darmstadt_unis'
pred, gold, sent_id = show_random(key, sent_id='University_of_Phoenix_Online_174_11-02-2004-1')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 232
[INFO] sent id: University_of_Phoenix_Online_174_11-02-2004-1


In [91]:
gold

{'sent_id': 'University_of_Phoenix_Online_174_11-02-2004-1',
 'text': 'In general I have been pleased with UOP instruction and technical support is excellent , but their response times with the newsgroups are often unacceptable .',
 'opinions': [{'Source': [[], []],
   'Target': [['UOP'], ['36:39']],
   'Polar_expression': [['pleased'], ['23:30']],
   'Polarity': 'Positive',
   'Intensity': 'Average'}]}

In [92]:
pred

{'sent_id': 'University_of_Phoenix_Online_174_11-02-2004-1',
 'text': 'In general I have been pleased with UOP instruction and technical support is excellent , but their response times with the newsgroups are often unacceptable .',
 'opinions': [{'Source': [[], []],
   'Target': [[], []],
   'Polar_expression': [['excellent'], ['77:86']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'},
  {'Source': [[], []],
   'Target': [['UOP'], ['36:39']],
   'Polar_expression': [['pleased'], ['23:30']],
   'Polarity': 'Negative',
   'Intensity': 'Standard'}]}

### example 2: wrong targets and opinions

In [93]:
key = 'darmstadt_unis'
pred, gold, sent_id = show_random(key, sent_id='DeVry_University_95_05-16-2004-7')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 232
[INFO] sent id: DeVry_University_95_05-16-2004-7


In [94]:
gold

{'sent_id': 'DeVry_University_95_05-16-2004-7',
 'text': 'The school gives students a knowledge base that makes them extremely competitive in the corporate world .',
 'opinions': [{'Source': [[], []],
   'Target': [['students'], ['17:25']],
   'Polar_expression': [['extremely', 'competitive'], ['59:68', '69:80']],
   'Polarity': 'Positive',
   'Intensity': 'Strong'}]}

In [95]:
pred

{'sent_id': 'DeVry_University_95_05-16-2004-7',
 'text': 'The school gives students a knowledge base that makes them extremely competitive in the corporate world .',
 'opinions': [{'Source': [[], []],
   'Target': [['school'], ['4:10']],
   'Polar_expression': [['knowledge'], ['28:37']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'}]}

### example 3: correct!

In [99]:
key = 'darmstadt_unis'
pred, gold, sent_id = show_random(key, sent_id='University_of_Maryland_University_College_3_01-03-2008-1')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 232
[INFO] sent id: University_of_Maryland_University_College_3_01-03-2008-1


In [100]:
gold

{'sent_id': 'University_of_Maryland_University_College_3_01-03-2008-1',
 'text': 'Its not perfect , but it works for individuals who are motivated , hardworking and yet are busy with their careers .',
 'opinions': [{'Source': [[], []],
   'Target': [['it'], ['22:24']],
   'Polar_expression': [['works'], ['25:30']],
   'Polarity': 'Positive',
   'Intensity': 'Weak'},
  {'Source': [[], []],
   'Target': [['Its'], ['0:3']],
   'Polar_expression': [['not', 'perfect'], ['4:7', '8:15']],
   'Polarity': 'Positive',
   'Intensity': 'Average'}]}

In [101]:
pred

{'sent_id': 'University_of_Maryland_University_College_3_01-03-2008-1',
 'text': 'Its not perfect , but it works for individuals who are motivated , hardworking and yet are busy with their careers .',
 'opinions': [{'Source': [[], []],
   'Target': [['Its'], ['0:3']],
   'Polar_expression': [['not perfect'], ['4:15']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'}]}

## mpqa

### example 1: partially correct

In [123]:
key = 'mpqa'
pred, gold, sent_id = show_random(key, sent_id='20020507/22.11.06-28210-26')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 1997
[INFO] sent id: 20020507/22.11.06-28210-26


In [124]:
gold

{'sent_id': '20020507/22.11.06-28210-26',
 'text': "The opposition leader commended Canada and other countries for invoking sanctions against Mr. Mugabe 's regime .",
 'opinions': [{'Source': [['The opposition leader'], ['0:21']],
   'Target': [["Mr. Mugabe 's regime"], ['90:110']],
   'Polar_expression': [['invoking sanctions against'], ['63:89']],
   'Polarity': 'Negative',
   'Intensity': 'Average'},
  {'Source': [['The opposition leader'], ['0:21']],
   'Target': [["invoking sanctions against Mr. Mugabe 's regime"], ['63:110']],
   'Polar_expression': [['commended'], ['22:31']],
   'Polarity': 'Positive',
   'Intensity': 'Average'},
  {'Source': [['The opposition leader'], ['0:21']],
   'Target': [["Mr. Mugabe 's regime"], ['90:110']],
   'Polar_expression': [['commended'], ['22:31']],
   'Polarity': 'Positive',
   'Intensity': 'Average'}]}

In [125]:
pred

{'sent_id': '20020507/22.11.06-28210-26',
 'text': "The opposition leader commended Canada and other countries for invoking sanctions against Mr. Mugabe 's regime .",
 'opinions': [{'Source': [['The opposition leader'], ['0:21']],
   'Target': [['Mr. Mugabe'], ['90:100']],
   'Polar_expression': [['invoking sanctions against'], ['63:89']],
   'Polarity': 'Negative',
   'Intensity': 'Standard'},
  {'Source': [['The opposition leader'], ['0:21']],
   'Target': [["Mr. Mugabe 's regime"], ['90:110']],
   'Polar_expression': [['invoking sanctions against'], ['63:89']],
   'Polarity': 'Negative',
   'Intensity': 'Standard'},
  {'Source': [['The opposition leader'], ['0:21']],
   'Target': [['Canada and other countries'], ['32:58']],
   'Polar_expression': [['commended'], ['22:31']],
   'Polarity': 'Negative',
   'Intensity': 'Standard'}]}

### example 2: correct! (target of pred is shorter)

In [170]:
key = 'mpqa'
pred, gold, sent_id = show_random(key, sent_id='20020304/20.42.01-25605-10')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 1997
[INFO] sent id: 20020304/20.42.01-25605-10


In [171]:
gold

{'sent_id': '20020304/20.42.01-25605-10',
 'text': 'For instance , he denounced as a human rights violation the banning and seizure of satellite dishes in Iran , while the measure has been taken in line with the law .',
 'opinions': [{'Source': [['he'], ['15:17']],
   'Target': [['the banning and seizure of satellite dishes in Iran'],
    ['56:107']],
   'Polar_expression': [['denounced'], ['18:27']],
   'Polarity': 'Negative',
   'Intensity': 'Strong'}]}

In [172]:
pred

{'sent_id': '20020304/20.42.01-25605-10',
 'text': 'For instance , he denounced as a human rights violation the banning and seizure of satellite dishes in Iran , while the measure has been taken in line with the law .',
 'opinions': [{'Source': [['he'], ['15:17']],
   'Target': [['dishes', 'Iran'], ['93:99', '103:107']],
   'Polar_expression': [['denounced'], ['18:27']],
   'Polarity': 'Negative',
   'Intensity': 'Standard'}]}

### example 3: wrong expression

In [233]:
key = 'mpqa'
pred, gold, sent_id = show_random(key, sent_id='20020304/20.42.01-25605-14')
# pred, gold, sent_id = show_random(key)

[INFO] dev set size: 1997
[INFO] sent id: 20020304/20.42.01-25605-14


In [234]:
gold

{'sent_id': '20020304/20.42.01-25605-14',
 'text': 'This means that his annual anti-Iran reports are only dictated to him by Zionist circles and arrogant powers like the United States that lost their illegitimate interests in Iran with the victory of the Islamic Revolution and downfall of their puppet Pahlavi regime .',
 'opinions': [{'Source': [['his'], ['16:19']],
   'Target': [['Iran'], ['32:36']],
   'Polar_expression': [['annual anti-Iran reports'], ['20:44']],
   'Polarity': 'Negative',
   'Intensity': 'Weak'}]}

In [235]:
pred

{'sent_id': '20020304/20.42.01-25605-14',
 'text': 'This means that his annual anti-Iran reports are only dictated to him by Zionist circles and arrogant powers like the United States that lost their illegitimate interests in Iran with the victory of the Islamic Revolution and downfall of their puppet Pahlavi regime .',
 'opinions': [{'Source': [[], []],
   'Target': [['Iran with the victory of the Islamic Revolution and downfall of their puppet Pahlavi regime'],
    ['174:265']],
   'Polar_expression': [['interests'], ['161:170']],
   'Polarity': 'Positive',
   'Intensity': 'Standard'}]}