In [31]:
from IPython.core.display import display, HTML
display(HTML(open('visualization/highlight.css').read()))
display(HTML(open('visualization/highlight.js').read()))

import visualization
from termcolor import colored

from collections import defaultdict
import numpy as np
import spacy
nlp = spacy.load('en')

In [32]:
import sys
sys.path.append("../")
from span_identification.dataset import load_data

# Span Identification

In [None]:
def load_result(file):
    result = {}
    with open(file, "r") as f:
        for line in f:
            article_id, spl, spr = line.split('\t')
            result.setdefault(article_id, [])
            result[article_id].append([int(spl), int(spr)])
    return result


def show_result(result, articles_id, articles_content, task):
    articles_contents = dict(zip(articles_id, articles_content))
    for article_id in sorted(result):
        text = articles_contents[article_id]
        nlp_text = nlp(text)
        tokens_idx = np.array([token.idx for token in nlp_text])
        tokens = [token.text for token in nlp_text]
        
        spans = []
        for sp in result[article_id]:
            sp = list(sp)
            sp[0] = np.where(tokens_idx >= sp[0])[0][0]
            sp[1] = np.where(tokens_idx < sp[1])[0][-1]
            spans.append([sp[0], sp[1]])
        
        if len(spans) != 0:
            print(colored(article_id, 'red'))
            visualization.render(tokens, [spans], task=task)

In [3]:
articles_content, articles_id, _ = load_data('../datasets/test-articles', 
                                             '../tools/data/propaganda-techniques-names-semeval2020task11.txt')
result = load_result('../results/SI_output_test.txt')
show_result(result, articles_id, articles_content, 'SI')

[31m813452859[0m


[31m813494037[0m


[31m813547724[0m


[31m813552066[0m


[31m813601978[0m


[31m813602345[0m


[31m813603860[0m


[31m813623212[0m


[31m813714967[0m


[31m813949697[0m


[31m813953273[0m


[31m813953435[0m


[31m813992175[0m


[31m814251296[0m


[31m814371058[0m


[31m814403543[0m


[31m814403783[0m


[31m814403875[0m


[31m814404002[0m


[31m814427361[0m


[31m814435435[0m


[31m814630609[0m


[31m814777937[0m


[31m815412286[0m


[31m815858385[0m


[31m816460196[0m


[31m816720060[0m


[31m817147979[0m


[31m817176202[0m


[31m817190270[0m


[31m817408115[0m


[31m817449755[0m


[31m818141325[0m


[31m820419869[0m


[31m820791520[0m


[31m821744708[0m


[31m822220578[0m


[31m822942601[0m


[31m824256050[0m


[31m824350729[0m


[31m824658990[0m


[31m824684605[0m


[31m829267754[0m


[31m829815104[0m


[31m830153674[0m


[31m830274102[0m


[31m830359136[0m


[31m830359423[0m


[31m830821478[0m


[31m832269185[0m


[31m832918490[0m


[31m832920387[0m


[31m832926076[0m


[31m832931332[0m


[31m832933796[0m


[31m832940138[0m


[31m832941978[0m


[31m832947554[0m


[31m832947600[0m


[31m832947852[0m


[31m832948083[0m


[31m832956618[0m


[31m832959523[0m


[31m832971448[0m


[31m832984694[0m


[31m833013834[0m


[31m833018464[0m


[31m833021113[0m


[31m833024133[0m


[31m833024696[0m


[31m833028146[0m


[31m833028680[0m


[31m833028932[0m


[31m833032366[0m


[31m833032367[0m


[31m833036176[0m


[31m833036489[0m


[31m833039623[0m


[31m833040400[0m


[31m833041409[0m


[31m833042063[0m


[31m833050243[0m


[31m833052347[0m


[31m833053628[0m


[31m833053676[0m


[31m833067493[0m


## Technique classification

In [33]:
def load_result(file):
    result = defaultdict(dict)
    with open(file, "r") as f:
        for line in f:
            article_id, prediction, spl, spr = line.split('\t')
            result[article_id].setdefault(prediction, [])
            result[article_id][prediction].append([int(spl), int(spr)])
    return result


def show_result(result, articles_id, articles_content, top=None):
    showed = 0
    articles = dict(zip(articles_id, articles_content))
    for article_id in sorted(result):
        text = articles[article_id]
        nlp_text = nlp(text)
        tokens_idx = np.array([token.idx for token in nlp_text])
        tokens = [token.text for token in nlp_text]
        
        spans = [[] for _ in range(len(mapping))]
        for cluster in result[article_id]:
            for sp in result[article_id][cluster]:
                sp[0] = np.where(tokens_idx >= sp[0])[0][0]
                sp[1] = np.where(tokens_idx < sp[1])[0][-1]
                spans[inverse_mapping[cluster]].append([sp[0], sp[1]])
        
        if len(spans) != 0:
            print(colored(article_id, 'red'))
            #show_box_markup(text, spans, palette=palette(PROP=BLUE))
            visualization.render(tokens, spans, task='TC')
            showed += 1
            if top is not None and showed > top:
                break
                
                
mapping = {i: el for i, el in enumerate(['Appeal_to_Authority', 'Doubt', 'Repetition',
       'Appeal_to_fear-prejudice', 'Slogans', 'Black-and-White_Fallacy',
       'Loaded_Language', 'Flag-Waving', 'Name_Calling,Labeling',
       'Whataboutism,Straw_Men,Red_Herring', 'Causal_Oversimplification',
       'Exaggeration,Minimisation', 'Bandwagon,Reductio_ad_hitlerum',
       'Thought-terminating_Cliches'])}

inverse_mapping = {b:a for (a, b) in mapping.items()}

In [35]:
articles_content, articles_id, _ = load_data('../datasets/test/test-articles', 
                                             '../tools/data/propaganda-techniques-names-semeval2020task11.txt')
result = load_result('../results/TC_output_test.txt')
show_result(result, articles_id, articles_content)

[31m813452859[0m


[31m813494037[0m


[31m813547724[0m


[31m813552066[0m


[31m813601978[0m


[31m813602345[0m


[31m813603860[0m


[31m813623212[0m


[31m813714967[0m


[31m813949697[0m


[31m813953273[0m


[31m813953435[0m


[31m813992175[0m


[31m814251296[0m


[31m814371058[0m


[31m814403543[0m


[31m814403783[0m


[31m814403875[0m


[31m814404002[0m


[31m814427361[0m


[31m814435435[0m


[31m814630609[0m


[31m814777937[0m


[31m815412286[0m


[31m815858385[0m


[31m816460196[0m


[31m816720060[0m


[31m817147979[0m


[31m817176202[0m


[31m817190270[0m


[31m817408115[0m


[31m817449755[0m


[31m818141325[0m


[31m820419869[0m


[31m820791520[0m


[31m821040551[0m


[31m821744708[0m


[31m822220578[0m


[31m822942601[0m


[31m824256050[0m


[31m824350729[0m


[31m824658990[0m


[31m824684605[0m


[31m829267754[0m


[31m829815104[0m


[31m830153674[0m


[31m830274102[0m


[31m830359136[0m


[31m830359423[0m


[31m830821478[0m


[31m832918490[0m


[31m832920387[0m


[31m832926076[0m


[31m832931332[0m


[31m832933796[0m


[31m832934428[0m


[31m832940138[0m


[31m832941978[0m


[31m832947554[0m


[31m832947600[0m


[31m832947852[0m


[31m832948083[0m


[31m832956618[0m


[31m832959523[0m


[31m832971448[0m


[31m832984694[0m


[31m833013834[0m


[31m833018464[0m


[31m833021113[0m


[31m833024133[0m


[31m833024696[0m


[31m833028146[0m


[31m833028680[0m


[31m833028932[0m


[31m833032366[0m


[31m833032367[0m


[31m833036176[0m


[31m833036489[0m


[31m833039623[0m


[31m833040400[0m


[31m833041409[0m


[31m833042063[0m


[31m833050243[0m


[31m833052347[0m


[31m833053628[0m


[31m833053676[0m


[31m833067493[0m
