# Claims Analysis

In [None]:
%load_ext autoreload
%autoreload 2

In [40]:
import operator
import gb.hypergraph.symbol as sym
import gb.hypergraph.edge as ed
import gb.nlp.parser as par
import gb.tools.json as json_tools
from gb.clusters.meronomy import Meronomy

### Constants

In [2]:
MAX_PROB = -12

### Start parser

In [3]:
parser = par.Parser()

### Read data & build full edges list

In [4]:
edge_data = json_tools.read('../all.json')

full_edges = []
for it in edge_data:
    full_edges.append(ed.without_namespaces(ed.str2edge(it['edge'])))

### Meronomy and Synonyms

In [5]:
# build meronomy
mer = Meronomy(parser, full_edges)
mer.normalize_graph()

# generate synonyms
mer.generate_synonyms()

### Filter claim hyperedges
(says x ...)

In [6]:
def rel_contains(full_edge, term):
    if sym.is_edge(full_edge) and len(full_edge) > 2 and sym.is_edge(full_edge[2]):
        rel = full_edge[0]
        if sym.is_edge(rel):
            return term in rel
        else:
            return rel == term
    return False


say_edges = []
for full_edge in full_edges:
    if rel_contains(full_edge, 'says'):
        say_edges.append(full_edge)

In [7]:
def edge2str(edge):
    s = ed.edge2str(edge, namespaces=False)
    if sym.is_edge(edge):
        return s

    if s[0] == '+':
        s = s[1:]

    if len(s) == 0:
        return None

    if not s[0].isalnum():
        return None

    word = parser.make_word(s)
    if word.prob < MAX_PROB:
        return s

    return None

def edge2syn(edge):
    atom = edge2str(edge)
    if atom:
        syn_id = mer.syn_id(atom)
        if syn_id:
            return syn_id
    return None

### Sayers, sayers + claims & sorted sayers

In [20]:
sayers = {}
sayers_and_claims = {}
for edge in say_edges:
    sayer = edge2syn(edge[1])
    if sayer not in sayers_and_claims:
        sayers[sayer] = 0
        sayers_and_claims[sayer] = []
    sayers[sayer] += 1
    sayers_and_claims[sayer].append(edge[2])
        
sorted_sayers = sorted(sayers.items(), key=operator.itemgetter(1), reverse=True)

### Concepts by sayer

In [9]:
concepts_by_sayer = {}


def add_concepts(targ, src):
    for key in src:
        if key in targ:
            targ[key] += src[key]
        else:
            targ[key] = src[key]


def concepts_in_claim(claim, concept_map=None):
    if not concept_map:
        concept_map = {}
    syn_id = edge2syn(claim)
    if syn_id:
        if syn_id not in concept_map:
            concept_map[syn_id] = 0
        concept_map[syn_id] += 1
        
        if sym.is_edge(claim):
            for item in claim:
                concepts_in_claim(item, concept_map)
    return concept_map


def get_concepts_by_sayer(sayer, that_include=None):
    concept_map = {}
    for claim in sayers_and_claims[sayer]:
        claim_concepts = concepts_in_claim(claim)
        if not that_include:
            add_concepts(concept_map, claim_concepts)
        elif that_include in claim_concepts.keys():
            del claim_concepts[that_include]
            add_concepts(concept_map, claim_concepts)
    return concept_map


for sayer in sayers_and_claims:
    concepts_by_sayer[sayer] = get_concepts_by_sayer(sayer)

### Who talks about who graph

In [45]:
who_who = {}


def add_edge(orig, targ):
    if orig and targ:
        if orig not in who_who:
            who_who[orig] = {}
        if targ not in who_who[orig]:
            who_who[orig][targ] = 0
        who_who[orig][targ] += 1

for sayer in sayers:
    for claim in sayers_and_claims[sayer]:
        claim_concepts = concepts_in_claim(claim)
        for concept in claim_concepts:
            if concept in sayers:
                add_edge(sayer, concept)
            
file = open('../who_who.csv', 'w')
for orig in who_who:
    for targ in who_who[orig]:
        w = who_who[orig][targ]
        file.write('%s,%s,%s\n' % (mer.synonym_label(orig, short=True), mer.synonym_label(targ, short=True), w))
file.close()

## Experiments

In [10]:
for t in sorted_sayers[:20]:
    syn_id = t[0]
    if syn_id:
        print('%s %s %s %s' % (syn_id, mer.synonym_label(syn_id), t[1], len(concepts_by_sayer[syn_id])))

2 {donald, trump, (+ donald trump)} 358 1479
8 {(+ bernie sanders), sanders, bernie} 93 369
20 {hillary, (+ hillary clinton), clinton} 88 338
258749 {obama} 75 304
764 {ryan, (+ paul ryan), paul} 21 95
453 {mike, (+ mike pence), pence} 17 70
51 {(+ john kasich), john, kasich} 15 62
1 {(+ gary johnson), johnson, (+libertarian (+ gary johnson))} 14 44
265004 {(+the latest)} 14 58
259357 {(+ white house)} 13 55
151 {(+the fbi), fbi} 11 39
504 {marco, rubio, (+ marco rubio)} 11 39
723 {chris, (+ chris christie), christie} 9 28
265734 {(+ bill clinton)} 9 40
260899 {(+ trump campaign)} 9 38
842 {mitch, (+ mitch mcconnell), mcconnell} 9 39
810 {(+ rudy giuliani), giuliani, rudy} 8 33
234 {newt, gingrich, (+ newt gingrich)} 8 38
208 {warren, elizabeth, (+ elizabeth warren)} 7 36


In [11]:
concepts = concepts_by_sayer[20]

sorted_concepts = sorted(concepts.items(), key=operator.itemgetter(1), reverse=True)
for t in sorted_concepts[:20]:
    syn_id = t[0]
    if syn_id:
        print('%s %s' % (mer.synonym_label(syn_id), t[1]))

{donald, trump, (+ donald trump)} 28
{(+ bernie sanders), sanders, bernie} 25
{(+of use (+a (+private (+ email server)))), (+a (+private (+ email server))), (+for investigation (+her (+of use (+a (+private (+ email server)))))), (+private (+ email server)), (+federal (+for investigation (+her (+of use (+a (+private (+ email server))))))), (+ email server), (+under (+federal (+for investigation (+her (+of use (+a (+private (+ email server)))))))), (+over (+her (+of use (+a (+private (+ email server)))))), (+her (+of use (+a (+private (+ email server)))))} 6
{(+about (+hard (+' choices))), (+hard (+' choices)), (+' choices), (+but it (+about (+hard (+' choices))))} 4
{(+ his homework’), homework’, (+ bank regulation), (+on (+ his homework’) (+ bank regulation))} 4
{paranoia, (+and paranoia prejudice), prejudice, (+on (+and paranoia prejudice))} 4
{(+$ 200,000), 200,000, (+in (+just (+more (+than (+$ 200,000)))) income), (+about (+$ 200,000)), (+than (+$ 200,000)), (+more (+than (+$ 200,0

In [13]:
# common concepts

concepts1 = set(concepts_by_sayer[2].keys())
concepts2 = set(concepts_by_sayer[258749].keys())

common = concepts1.intersection(concepts2)
for concept in common:
    print(mer.synonym_label(concept))

{(+ gary johnson), johnson, (+libertarian (+ gary johnson))}
{donald, trump, (+ donald trump)}
{putin}
{gop}
{(+ bernie sanders), sanders, bernie}
{nominee}
{vladimir, (+ vladimir putin)}
{presidency, (+the presidency)}
{(+in u.s)}
{(+ climate change)}
{hillary, (+ hillary clinton), clinton}
{(+the fbi), fbi}
{russia}
{u.s, (+the u.s)}
{shootings, (+ police shootings)}
{presidential}
{(+of (+daily (+ intelligence briefings))), (+ intelligence briefings), (+his (+daily (+ intelligence briefings))), briefings, (+daily (+ intelligence briefings)), (+to_skip (+daily (+ intelligence briefings)))}
{meddling, (+for calls (+select (+on panel (+ russian meddling)))), (+ russian meddling), (+select (+on panel (+ russian meddling))), (+on panel (+ russian meddling))}
{will_take}
{african, (+ african americans)}
{india}
{democrats}
{cnn}
{(+of (+the (+ world leaders) (+who had))), (+the (+ world leaders) (+who had)), (+two (+of (+the (+ world leaders) (+who had)))), (+ world leaders), (+who had)}


In [14]:
concepts = get_concepts_by_sayer(2, that_include=8)
for concept in concepts:
    print('%s %s' % (mer.synonym_label(concept), concepts[concept]))

{(+and (+ bernie sanders) (+a communist))} 1
{(+a communist)} 1
{(+' sanders)} 2
{(will_debate he (+ bernie sanders) (+for (+$ (+10 million))))} 1
{will_debate, (will_debate in)} 1
{(+for (+$ (+10 million)))} 1
{(+$ (+10 million)), (+than (+$ (+10 million))), (+10 million), (+more (+than (+$ (+10 million))))} 2
{(would_kill (+ bernie sanders) golf (+with high taxes))} 1
{(thought would_kill), ((thought would_kill) economists obamacare jobs), would_kill} 1
{(+with high taxes)} 1
{('ll_order he supporters (+to_disrupt (+rallies (+' sanders))))} 1
{(+to_disrupt (+rallies (+' sanders)))} 1
{to_disrupt, (+to_disrupt (+ pipeline drilling)), (+ pipeline drilling)} 1
{(+rallies (+' sanders))} 1
{rallies} 1
{(+clinton says not_qualified’ because sanders so)} 1
{hillary, (+ hillary clinton), clinton} 1
{not_qualified’} 1
{(says sanders (+no debate) (+ interested networks))} 1
{(+no debate)} 1
{(+ interested networks)} 1
{(sold (+ bernie sanders) (+ his soul) (+to (+the devil’)))} 1
{(+ his soul)