# Data demo

* How to load the texts
* How to load annotations
* Example analysis

In [1]:
import json
import glob

## Human-annotated data

In [2]:
# texts
path_gold = 'corpus_gold/'
event = 'Eurovision_Song_Contest_2020'
gold_2020_en_texts = glob.glob(f'{path_gold}{event}/texts/en/*.json')
print(len(gold_2020_en_texts))
print(gold_2020_en_texts[:3])

42
['corpus_gold/Eurovision_Song_Contest_2020/texts/en/Q30973589-234.json', 'corpus_gold/Eurovision_Song_Contest_2020/texts/en/Q30973589-25.json', 'corpus_gold/Eurovision_Song_Contest_2020/texts/en/Q30973589-259.json']


In [3]:
# wikidata info
gold_2020_wiki = f'{path_gold}{event}/wikidata.json'
with open(gold_2020_wiki) as infile:
    d_wiki = json.load(infile)

In [21]:
# event q number wikidata:
event_q = d_wiki['wikidata_q']
event_q

'Q30973589'

In [4]:
print(len(d_wiki['documents']['en']))

42


In [5]:
# example text

example_text = gold_2020_en_texts[5]

with open(example_text) as infile:
    text_d = json.load(infile)

In [6]:
text_d['title']

'Aksel Kankaanranta will represent Finland at the Eurovision Song Contest 2020'

In [7]:
print(text_d['text'][:300])

Aksel Kankaanranta will represent Finland at the Eurovision Song Contest 2020.

Aksel Kankaanranta has won the Contest for New Music UMK with the song Looking Back, and will represent Finland at the Eurovision Song Contest in Rotterdam in May 2020. UMK 2021 will also be held as an open contest. The 


### Human and automatic frame annotation

The data have been annotated for semantic roles by human annotators on top of automatic SRL annotation (open sesame). 

In [8]:
# SRL annotation

srl = text_d['srl']
#srl

In [18]:
for pred_role_d in srl:
    print('Frame', pred_role_d['frame'])
    print(pred_role_d['source'])
    for tok in pred_role_d['predicate']['token_text']:
        print(tok.strip())
    for role in pred_role_d['roles']:
        role_name = role['role']
        if role['token_text']:
            for tok in role['token_text']:
                print(role_name, tok.strip())
    print()
    break

Frame http://premon.fbk.eu/resource/fn17-finish_game
NGZZOnuEsmEAa27KU3IFN-54m58WqHVh
won
http://premon.fbk.eu/resource/fn17-finish_game@game Contest
http://premon.fbk.eu/resource/fn17-finish_game@player Aksel



In [24]:
# coref annotation
coref = text_d['coref']

for chain in coref:
    reference = chain['reference']
    if event_q == reference:
        event_ref = 'event-ref'
    else:
        event_ref = ''
    
    print(event_q, reference, event_ref)
    if event_ref != '':
        print(chain)
    

Q30973589 Q83402937 
Q30973589 Q30973589 event-ref
{'coref_text': {'\nEurovision\n': [[34]], '\nSong\n': [[35]], '\nContest\n': [[36]], '\nin\n': [[37]], '\nRotterdam\n': [[38]]}, 'coref_type': 'event', 'reference': 'Q30973589'}
Q30973589 Q83402937 
Q30973589 Q30973589 event-ref
{'coref_text': {'\nthe\n': [[236]], '\nEurovision\n': [[237]], '\nSong\n': [[238]], '\nContest\n': [[239]]}, 'coref_type': 'event', 'reference': 'Q30973589'}
Q30973589 Q83402937 
Q30973589 Q30973589 event-ref
{'coref_text': {'\nthe\n': [[332]], '\nEurovision\n': [[333]], '\nSong\n': [[334]], '\nContest\n': [[335]]}, 'coref_type': 'event', 'reference': 'Q30973589'}
Q30973589 Q83402937 
