# Análise de viés por subjetividade

In [1]:
# -*- coding: utf-8 -*-
from __future__ import print_function
import pandas as pd
import numpy as np
import nltk
import re
from nltk.corpus import stopwords
nltk.download('stopwords')
from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import euclidean_distances
from pyemd import emd
import unidecode

PUNCTUATION = u'[^a-zA-Z0-9àáéíóúÀÁÉÍÓÚâêîôÂÊÎÔãõÃÕçÇäöüÄÖÜ]' # define news punctuation 
SENTENCE_SIZE_THRESHOLD = 2 # Minimum length of a text

[nltk_data] Downloading package stopwords to /home/allan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Definindo Lexicons

In [2]:
#  Lexicons definition
#portuguese
portuguese_dict = {
    "argumentacao": "a_ponto ao_menos apenas ate ate_mesmo incluindo inclusive mesmo nao_mais_que nem_mesmo no_minimo o_unico a_unica pelo_menos quando_menos quando_muito sequer so somente a_par_disso ademais afinal ainda alem alias como e e_nao em_suma enfim mas_tambem muito_menos nao_so nem ou_mesmo por_sinal tambem tampouco assim com_isso como_consequencia consequentemente de_modo_que deste_modo em_decorrencia entao logicamente logo nesse_sentido pois por_causa por_conseguinte por_essa_razao por_isso portanto sendo_assim ou ou_entao ou_mesmo nem como_se de_um_lado por_outro_lado mais_que menos_que tanto quanto tao como desde_que do_contrario em_lugar em_vez enquanto no_caso quando se se_acaso senao de_certa_forma desse_modo em_funcao enquanto isso_e ja_que na_medida_que nessa_direcao no_intuito no_mesmo_sentido ou_seja pois porque que uma_vez_que tanto_que visto_que ainda_que ao_contrario apesar_de contrariamente contudo embora entretanto fora_isso mas mesmo_que nao_obstante nao_fosse_isso no_entanto para_tanto pelo_contrario por_sua_vez porem posto_que todavia",
    "modalizacao": "achar aconselhar acreditar aparente basico bastar certo claro conveniente crer dever dificil duvida efetivo esperar evidente exato facultativo falar fato fundamental imaginar importante indubitavel inegavel justo limitar logico natural necessario negar obrigatorio obvio parecer pensar poder possivel precisar predominar presumir procurar provavel puder real recomendar seguro supor talvez tem tendo ter tinha tive verdade decidir",
    "valoracao": "absoluto algum alto amplo aproximado bastante bem bom categorico cerca completo comum consideravel constante definitivo demais elevado enorme escasso especial estrito eventual exagero excelente excessivo exclusivo expresso extremo feliz franco franqueza frequente generalizado geral grande imenso incrivel lamentavel leve maioria mais mal melhor menos mero minimo minoria muito normal ocasional otimo particular pena pequeno pesar pior pleno pobre pouco pouquissimo praticamente prazer preciso preferir principal quase raro razoavel relativo rico rigor sempre significativo simples tanto tao tipico total tremenda usual valer",
    "sentimento": "abalar abater abominar aborrecer acalmar acovardar admirar adorar afligir agitar alarmar alegrar alucinar amar ambicionar amedrontar amolar animar apavorar apaziguar apoquentar aporrinhar apreciar aquietar arrepender assombrar assustar atazanar atemorizar aterrorizar aticar atordoar atormentar aturdir azucrinar chatear chocar cobicar comover confortar confundir consolar constranger contemplar contentar contrariar conturbar curtir debilitar decepcionar depreciar deprimir desapontar descontentar descontrolar desejar desencantar desencorajar desesperar desestimular desfrutar desgostar desiludir desinteressar deslumbrar desorientar desprezar detestar distrair emocionar empolgar enamorar encantar encorajar endividar enervar enfeiticar enfurecer enganar enraivecer entediar entreter entristecer entusiasmar envergonhar escandalizar espantar estimar estimular estranhar exaltar exasperar excitar execrar fascinar frustar gostar gozar grilar hostilizar idolatrar iludir importunar impressionar incomodar indignar inibir inquietar intimidar intrigar irar irritar lamentar lastimar louvar magoar maravilhar melindrar menosprezar odiar ofender pasmar perdoar preocupar prezar querer recalcar recear reconfortar rejeitar repelir reprimir repudiar respeitar reverenciar revoltar seduzir sensibilizar serenar simpatizar sossegar subestimar sublimar superestimar surpreender temer tolerar tranquilizar transtornar traumatizar venerar",
    "pressuposicao": "adivinhar admitir agora aguentar ainda antes atentar atual aturar comecar compreender conseguir constatar continuar corrigir deixar demonstrar descobrir desculpar desde desvendar detectar entender enxergar esclarecer escutar esquecer gabar ignorar iniciar interromper ja lembrar momento notar observar olhar ouvir parar perceber perder pressentir prever reconhecer recordar reparar retirar revelar saber sentir tolerar tratar ver verificar"
}

#english
english_dict = {
    "argumentacao": "to_the_point at_least only until even including inclusive even no_more_than not_even at_the_very_least the_only the_only at_least at_the_very_least at_best even alone only besides_that moreover after_all yet beyond moreover as e and_not in_short at_last but_also let_alone not_only nor or by_the_way moreover neither thus therefore as_a_consequence consequently so_that this_way as_a_result then logically therefore that_way for because_of therefore for_this_reason therefore therefore therefore or or or nor as_if on_the_one_hand on_the_other_hand more_than unless not_only so_much how_much so as provided otherwise in_place instead while in_this_case when yourself in_case downside somehow that_way according_to while that_is since to_the_extent_that that_way in_order in_the_same_vein that_is for because who since so_much_so_that since although backwards despite conversely yet although meanwhile otherwise but even_if nevertheless were_it_not_for_that however for_this_purpose on_the_contrary in_turn but since yet",
    "modalizacao": "think advise believe apparent basic suffice right clear-cut convenient believe duty difficult doubt effective expect evident exact optional speak suit fundamental imagine important undoubted undeniable fair limit logical natural necessary deny mandatory obvious opinion think power possible need predominate presume search likely can real recommend sure suppose perhaps has having have had I_had truth decide",
    "valoracao": "absolute any tall broad approximate quite_a_lot good good emphatic fence complete common considerable constant final too_much high huge scarce special strict possible exaggeration superb excessive exclusive expressed extreme happy frank frankness frequent widespread general big immense incredible unfortunate light majority more evil better less mere slightest minority a_lot normal occasional great private pity small sorrow worst full poor little very_little practically pleasure precise prefer principal almost rare reasonable relative rich rigour ever significant simple so_much so typical full tremendous usual count",
    "sentimento": "shake shoot_down abhor annoy soothe coward admire worship afflict stir alarm cheer hallucinate love aspire frighten grind cheer terrify appease upset crowd appreciate quiet repent haunt frighten atazanar frighten terrorize stir stun torment stun azucrinar annoy hatch covet shake comfort confuse comfort constrain contemplate content contradict disturb enjoy weaken let_down depreciate depress disappoint displease get_out_of_hand wish disenchant discourage despair discourage enjoy dislike let_down disinterest dazzle disorientate despise detest distract thrill excite fall_in_love delight encourage indebtedness annoy bewitch infuriate deceive rage bore entertain sadden enthuse embarrass scandalize frighten_away estimate stimulate odd exalt exasperate excite execute fascinate frustrate like enjoy grill harass idolize delude harass impress bother indign inhibit upset intimidate intrigue irar annoy mourn lament praise hurt malquerer marvel sore belittle obsess hate offend amaze forgive worry cherish want repress be_afraid comfort reject repel suppress repudiate respect worship revolt seduce raise_awareness calm sympathize settle_down underestimate sublimate overestimate amaze fear tolerate reassure upset traumatize worship",
    "pressuposicao": "guess admit now bear yet beforehand look_out current endure commence comprehend get note carry_on correct let demonstrate discover excuse since unravel detect understand see arify listen forget brag ignore commence interrupt already remember moment notice observe look listen stop perceive miss sense foresee acknowledge recall repair remove unveil owledge feel tolerate treat see check"
}

#german
german_dict = {
    "argumentacao": "auf_den_Punkt_gebracht zumindest nur bis gerade einschließlich inklusive gerade nicht_mehr_als nicht_einmal zumindest die_einzige die_einzige zumindest zumindest bestenfalls gerade allein nur außerdem außerdem immerhin dennoch jenseits außerdem wie e und_nicht kurz_gesagt endlich aber_auch geschweige_denn nicht_nur auch_nicht oder ubrigens außerdem auch_nicht somit daher infolgedessen folglich so_dass auf_diese_Weise infolgedessen dann logischerweise daher auf_diese_Weise fur wegen daher aus_diesem_Grund daher daher daher oder oder oder auch_nicht als_ob einerseits andererseits mehr_als es_sei_denn nicht_nur so_sehr wie_viel so wie bereitgestellt anderweitig an_Ort_und_Stelle stattdessen wahrend in_diesem_Fall wann selbst fur_den_Fall Nachteil irgendwie auf_diese_Weise nach wahrend das_heißt da insoweit_als auf_diese_Weise in_der_Reihenfolge in_gleicher_Weise das_heißt fur denn wer da so_sehr dass da obwohl ruckwarts trotz Andererseits dennoch obwohl in_der_Zwischenzeit anderweitig aber auch_wenn trotzdem ohne_das_ware_es_nicht_so jedoch zu_diesem_Zweck im_Gegenteil wiederum aber da dennoch",
    "modalizacao": "denken beraten glauben offensichtlich grundlegend ausreichen rechts klar_umrissen praktisch glauben Pflicht schwierig Zweifel effektiv erwarten offensichtlich genau optional sprechen Klage grundlegend sich[Dat]_einbilden wichtig zweifellos unbestreitbar fair Limit logisch naturlich notwendig leugnen obligatorisch offensichtlich Meinung denken Macht moglich Bedarf vorherrschen annehmen Suche wahrscheinlich Dose wirklich empfehlen sicher annehmen vielleicht hat habend haben hatte Ich_hatte Wahrheit entscheiden",
    "valoracao": "absolut irgendeine hochgewachsen breit ungefahr ziemlich_viel gut gut nachdrucklich Zaun vollstandig allgemein betrachtlich konstant Endspiel zu_viel hoch riesig knapp speziell streng moglich ubertreibung superb ubertrieben exklusiv ausgedruckt extrem glucklich offen Offenheit haufig weit_verbreitet allgemein groß immens unglaublich unglucklich Licht Mehrheit mehr bose besser weniger nur geringste Minderheit oft normal gelegentlich großartig privat Mitleid klein Trauer am_schlimmsten voll schlecht wenig sehr_wenig praktisch Vergnugen prazise bevorzugen Auftraggeber fast selten vernunftig relativ reich Strenge jemals signifikant einfach so_sehr so typisch voll riesig ublich Anzahl",
    "sentimento": "schutteln abschießen verabscheuen verargern beruhigen Feigling bewundern Anbetung belasten sich[Akk]_bewegen Alarm jubeln halluzinieren Liebe streben erschrecken schleifen jubeln erschrecken besanftigen verargert Menge schatzen ruhig bereuen Lieblingsort erschrecken atazanar erschrecken terrorisieren sich[Akk]_bewegen betauben Qualerei betauben Azukrinar verargern Klappe begehren schutteln Komfort verwirren Komfort einschranken nachdenken Inhalt widersprechen storen genießen abschwachen enttauschen abschreiben niederdrucken enttauschen missfallen außer_Kontrolle_geraten Wunsch desillusionieren entmutigen Verzweiflung entmutigen genießen nicht_mogen enttauschen Desinteresse blendend desorientieren verachten verabscheuen ablenken Nervenkitzel anregen sich[Akk]_verlieben Freude ermutigen Verschuldung verargern verzaubern wutend_machen tauschen Wut Bohrung unterhalten traurig_machen sich[Akk]_begeistern in_Verlegenheit_bringen einen_Skandal_hervorrufen verscheuchen Schatzung anregen merkwurdig verherrlichen verargern anregen ausfuhren faszinieren frustrieren wie genießen Grill belastigen vergottern tauschen belastigen beeindrucken belastigen emporen hemmen verargert einschuchtern Intrige irrar verargern trauern Klage Lob verletzt Verleumder Wunder wund herabsetzen besessen Hass beleidigen verbluffen verzeihen Sorge schatzen wollen unterdrucken Angst_haben Komfort ablehnen abwehren unterdrucken ablehnen Respekt Anbetung Revolte verfuhren Bewusstseinsbildung ruhig mitfuhlen sich[Akk]_beruhigen unterschatzen sublimieren uberbewerten verbluffen Angst tolerieren beruhigen verargert traumatisieren Anbetung",
    "pressuposicao": "erraten zugeben jetzt Bar dennoch vorher aufpassen aktuell ertragen beginnen verstehen bekommen Hinweis weitermachen richtig lassen demonstrieren entdecken Entschuldigung da twirren erkennen verstehen sehen klaren zuhoren vergessen prahlen ignorieren beginnen unterbrechen bereits sich[Akk]_erinnern Moment Hinweis beobachten Aussehen zuhoren Haltestelle hrnehmen verpassen Sinn voraussehen bestatigen Ruckruf Reparatur entfernen enthullen Wissen Gefuhl tolerieren Leckerbissen sehen prufen"
}

In [3]:
# Mapping words in lexicons
map_lexicons_por = {'a ponto':'a_ponto','ao menos ':'ao_menos ','ate mesmo ':'ate_mesmo ',
                'nao mais que ':'nao_mais_que ','nem mesmo ':'nem_mesmo ','no minimo ':'no_minimo ',
                'o unico ':'o_unico ','a unica ':'a_unica ','pelo menos ':'pelo_menos ',
                'quando menos ':'quando_menos ','quando muito ':'quando_muito ','a par disso ':'a_par_disso ',
                'e nao ':'e_nao ','em suma ':'em_suma ','mas tambem ': 'mas_tambem ','muito menos ':'muito_menos ',
                'nao so ':'nao_so ','ou mesmo ':'ou_mesmo ','por sinal ':'por_sinal ','com isso ':'com_isso ',
                'como consequencia ':'como_consequencia ','de modo que ':'de_modo_que ','deste modo ':'deste_modo ',
                'em decorrencia ':'em_decorrencia ','nesse sentido ':'nesse_sentido ','por causa ':'por_causa ',
                'por conseguinte ':'por_conseguinte ','por essa razao ':'por_essa_razao ','por isso ':'por_isso ',
                'sendo assim ':'sendo_assim ','ou entao ':'ou_entao ','ou mesmo ':'ou_mesmo ','como se ':'como_se ',
                'de um lado ':'de_um_lado ','por outro lado ':'por_outro_lado ','mais que ':'mais_que ',
                'menos que ':'menos_que ','desde que ':'desde_que ','do contrario ':'do_contrario ',
                'em lugar ':'em_lugar ','em vez ':'em_vez','no caso ':'no_caso ','se acaso ':'se_acaso ',
                'de certa forma ':'de_certa_forma ','desse modo ':'desse_modo ','em funcao ':'em_funcao ',
                'isso e ':'isso_e ','ja que ':'ja_que ','na medida que ':'na_medida_que ','nessa direcao ':'nessa_direcao ',
                'no intuito ':'no_intuito ','no mesmo sentido ':'no_mesmo_sentido ','ou seja ':'ou_seja ',
                'uma vez que ':'uma_vez_que ','tanto que ':'tanto_que ','visto que ':'visto_que ','ainda que ':'ainda_que ',
                'ao contrario ':'ao_contrario ','apesar de ':'apesar_de ','fora isso ':'fora_isso ','mesmo que ':'mesmo_que ',
                'nao obstante ':'nao_obstante ','nao fosse isso ':'nao_fosse_isso ','no entanto ':'no_entanto ',
                'para tanto ':'para_tanto ','pelo contrario ':'pelo_contrario ','por sua vez ':'por_sua_vez ','posto que ':'posto_que '
               }

map_lexicons_eng ={"to the point":"to_the_point", "at least":"at_least", "no more than":"no_more_than", 
                   "not even":"not_even", "at the very least":"at_the_very_least", "the only":"the_only", 
                   "the only":"the_only", "at least":"at_least", "at the very least":"at_the_very_least", 
                   "at best":"at_best", "besides that":"besides_that", "after all":"after_all", "and not":"and_not", 
                   "in short":"in_short", "at last":"at_last", "but also":"but_also", "let alone":"let_alone", 
                   "not only":"not_only", "by the way":"by_the_way", "as a consequence":"as_a_consequence", 
                   "so that":"so_that", "this way":"this_way", "as a result":"as_a_result", "that way":"that_way", 
                   "because of":"because_of", "for this reason":"for_this_reason", "as if":"as_if", "on the one hand":"on_the_one_hand",
                   "on the one hand":"on_the_other_hand", "more than":"more_than", "not only":"not_only", "so much":"so_much", "how much":"how_much", "in place":"in_place",
                   "in this case":"in_this_case", "in case":"in_case", "that way":"that_way", "according to":"according_to", "that is":"that_is", 
                   "to the extent that":"to_the_extent_that", "that way":"that_way", "in order":"in_order", "in the same vein":"in_the_same_vein", "that is":"that_is",
                   "so much so that":"so_much_so_that", "even if":"even_if", "were it not for that":"were_it_not_for_that", "for this purpose":"for_this_purpose", 
                   "on the contrary":"on_the_contrary", "in turn":"in_turn", "i had":"i_had", "quite a lot":"quite_a_lot", "too much":"too_much", "a lot":"a_lot", 
                   "very little":"very_little", "so much":"so_much", "shoot down":"shoot_down", "let down":"let_down", "get out of hand":"get_out_of_hand", "let down":"let_down", 
                   "fall in love":"fall_in_love", "frighten away":"frighten_away", "be afraid":"be_afraid", "raise awareness":"raise_awareness", "settle down":"settle_down", 
                   "look out":"look_out", "carry on":"carry_on"}

map_lexicons_ger ={"auf den Punkt gebracht": "auf_den_Punkt_gebracht", "nicht mehr als": "nicht_mehr_als", 
                   "nicht einmal": "nicht_einmal", "die einzige": "die_einzige", "die einzige": "die_einzige", 
                   "und nicht": "und_nicht", "kurz gesagt": "kurz_gesagt", "aber auch": "aber_auch", 
                   "geschweige denn": "geschweige_denn", "nicht nur": "nicht_nur", "auch nicht": "auch_nicht", 
                   "auch nicht": "auch_nicht", "so dass": "so_dass", "auf diese Weise": "auf_diese_Weise", 
                   "auf diese Weise": "auf_diese_Weise", "aus diesem Grund": "aus_diesem_Grund", 
                   "auch nicht": "auch_nicht", "als ob": "als_ob", "mehr als": "mehr_als", "es sei denn": "es_sei_denn", 
                   "nicht nur": "nicht_nur", "so sehr": "so_sehr", "wie viel": "wie_viel", 
                   "an Ort und Stelle": "an_Ort_und_Stelle", "in diesem Fall": "in_diesem_Fall", 
                   "fur den Fall": "fur_den_Fall", "auf diese Weise": "auf_diese_Weise", "das heißt": 
                   "das_heißt", "insoweit als": "insoweit_als", "auf diese Weise": 
                   "auf_diese_Weise", "in der Reihenfolge": "in_der_Reihenfolge", 
                   "in gleicher Weise": "in_gleicher_Weise", "das heißt": "das_heißt", "so sehr": "so_sehr", 
                   "in der Zwischenzeit": "in_der_Zwischenzeit", "auch wenn": "auch_wenn", 
                   "ohne das ware es nicht so": "ohne_das_ware_es_nicht_so", "zu diesem Zweck": "zu_diesem_Zweck", 
                   "im Gegenteil": "im_Gegenteil", "klar umrissen": "klar_umrissen", 
                   "sich[Dat] einbildenIch hatte": "sich[Dat]_einbildenIch_hatte", "ziemlich viel": "ziemlich_viel", 
                   "zu viel": "zu_viel", "weit verbreitet": "weit_verbreitet", "am schlimmsten": "am_schlimmsten", 
                   "sehr wenig": "sehr_wenig", "so sehr": "so_sehr", "sich[Akk] bewegen": "sich[Akk]_bewegen", 
                   "sich[Akk]_bewegen": "sich[Akk]_bewegen", "außer Kontrolle geraten": "außer_Kontrolle_geraten", 
                   "nicht mogen": "nicht_mogen", "sich[Akk] verlieben": "sich[Akk]_verlieben", 
                   "wutend machen": "wutend_machen", "traurig machen": "traurig_machen", 
                   "sich[Akk]_begeistern": "sich[Akk]_begeistern", "in Verlegenheit bringen": "in_Verlegenheit_bringen", 
                   "einen Skandal hervorrufen": "einen_Skandal_hervorrufen", "Angst haben": "Angst_haben", 
                   "sich[Akk] beruhigen": "sich[Akk]_beruhigen", "sich[Akk] erinnern": "sich[Akk]_erinnern"}

### Carregando Word Embeddings

In [4]:
def set_embeddings(wv_lang):
    wv_lang.init_sims()
    vocab_dict ={word.encode('utf-8'):vocab.index for word, vocab in wv_lang.vocab.items()}
    W = np.double(wv_lang.vectors_norm)
    return(vocab_dict, W)

In [5]:
# Load model
#wv_por = KeyedVectors.load_word2vec_format('embeddings/CoNLL17/portuguese/model.txt', binary=False, unicode_errors='replace')
#vocab_dict_por, W_por = set_embeddings(wv_por)

wv_ger = KeyedVectors.load_word2vec_format('embeddings/CoNLL17/german/model.txt', binary=False, unicode_errors='replace')
vocab_dict_ger, W_ger = set_embeddings(wv_ger)

#wv_eng = KeyedVectors.load_word2vec_format('embeddings/CoNLL17/english/model.txt', binary=False, unicode_errors='replace')
#vocab_dict_eng, W_eng = set_embeddings(wv_eng)

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [6]:
vocab_dict_eng, W_eng = None, None

### Definindo Funções

In [7]:
# Convert word from text into lexicons
def word2lexicon(text, map_lexicons):
    text = text.decode('utf-8')
    text = re.sub(PUNCTUATION, " ", text).lower() # remove punctuation from text
    text = unidecode.unidecode(text) # remove accents
    for k, v in map_lexicons.items():
        text = text.replace(k,v)
    return text

# define settings of languages
def set_parameters(tex_lang):
    
    lang = "english"
    lang_dict = english_dict
    W = W_eng
    vocab_dict = vocab_dict_eng
    map_lexicons = map_lexicons_eng
    
    if tex_lang == "por":
        lang = "portuguese"
        lang_dict = portuguese_dict
        W = W_por
        vocab_dict = vocab_dict_por
        map_lexicons = map_lexicons_por
        
    if tex_lang == "deu":
        lang = "german"
        lang_dict = german_dict
        W = W_ger
        vocab_dict = vocab_dict_ger
        map_lexicons = map_lexicons_ger
        
    return(lang, lang_dict, W, vocab_dict, map_lexicons)

# function for processing text
def processSentences(text, lang):
    stop_words = stopwords.words(lang) # load stop words
    text = text.split() # split sentences by words
    text = [word for word in text if word not in stop_words] # Remove stopwords
    return " ".join(text)

# Compute the validity of the text by SENTENCE_SIZE_THRESHOLD
def is_valid_text(text):    
    return (True if len(text.split()) >= SENTENCE_SIZE_THRESHOLD else False)

# Check if the word is in the vocabulary
def check_value(word, vocab_dict):
    return (vocab_dict[word] if(word in vocab_dict) else 0)

# Compute the euclidean distances between the lexicons and the text
def lexicon_rate(lexicon, text, W, vocab_dict):
    vect = CountVectorizer(token_pattern="(?u)\\b[\\w-]+\\b", strip_accents=None).fit([lexicon, text])
    v_1, v_2 = vect.transform([lexicon, text])
    v_1 = v_1.toarray().ravel()
    v_2 = v_2.toarray().ravel()
    W_ = W[[check_value(w, vocab_dict) for w in vect.get_feature_names()]]
    D_ = euclidean_distances(W_)
    v_1 = v_1.astype(np.double)
    v_2 = v_2.astype(np.double)
    v_1 /= v_1.sum()
    v_2 /= v_2.sum()
    D_ = D_.astype(np.double)
    D_ /= D_.max()
    lex=emd(v_1, v_2, D_)
    return(lex)

# Compute bias for each lexicon dimension
def wmd_ratings(text, lang_dict, W, vocab_dict):
    if(is_valid_text(text)):
        arg = lexicon_rate(lang_dict["argumentacao"], text, W, vocab_dict)
        mod = lexicon_rate(lang_dict["modalizacao"], text, W, vocab_dict)
        val = lexicon_rate(lang_dict["valoracao"], text, W, vocab_dict)
        sen = lexicon_rate(lang_dict["sentimento"], text, W, vocab_dict)
        pre = lexicon_rate(lang_dict["pressuposicao"], text, W, vocab_dict)
        return arg, sen, val, mod, pre
    else :
        return -1, -1, -1, -1, -1

### Estimando viés para notícias

In [None]:
import glob

path = r'EventRegistryData/'
all_files = glob.glob(path + "/*.csv")
li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None)
    li.append(df)

news = pd.concat(li, axis=0, ignore_index=True)

In [8]:
#news_1 = pd.read_csv("EventRegistryData/news_Venezuela_por.csv")
#news_2 = pd.read_csv("EventRegistryData/news_Venezuela_deu.csv")

#news = news_1.head(1).append(news_2.head(1))
news = news_2.head(30)
n, c = news.shape

In [9]:
sub = pd.DataFrame(columns=["arg", "sen", "val", "mod", "pre"], index = range(0,n))

In [20]:
import time
start_time = time.time()

for index, article in news.iterrows():
    
    # Set news configuration
    lang, lang_dict, W, vocab_dict, map_lexicons = set_parameters(article["lang"])
    
    # Processing text
    text = word2lexicon(article["body"], map_lexicons)
    text = processSentences(text, lang)
    
    # Compute news bias
    arg, sen, val, mod, pre = wmd_ratings(text, lang_dict, W, vocab_dict)
    sub.loc[index,] = [arg, sen, val, mod, pre]
    
    if(index%10==0):
        sub.to_csv("subjectivity_temp.csv", index_label=False)
    
    print('Index: {0} - Progress: {1:.2f} %'.format(index, float(index) / n*100 ), end='\r')
print("--- %s seconds ---" % (time.time() - start_time))

--- 43.6390249729 seconds ---


In [22]:
pd.concat([news, sub],axis=1).to_csv("news_subjectivities.csv", index=False)