# News Analysis

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import operator
import math
import progressbar
import json as j
from unidecode import unidecode
import networkx as nx
from gb.notebook import *
import gb.synonyms.synonyms as syn
import gb.nlp.parser as par
from gb.metrics.hyper_similarity import HyperSimilarity

### HyperGraph, etc.

In [3]:
MAX_PROB = -12
hg = init_hypergraph('../reddit-worldnews-01012013-01082017-new-inference3.hg')
hs = HyperSimilarity(hg)
parser = par.Parser()

### NLP funs

In [4]:
def is_compound_propn(edge):
    if len(edge) < 3:
        return False
    if edge[0] != '+/gb':
        return False
    for item in edge[1:]:
        if is_symbol(item):
            if symbol_namespace(item)[-5:] != 'propn':
                return False
        elif not is_compound_propn(item):
            return False
    return True

def is_adj(edge):
    if is_edge(edge):
        return False
    return symbol_namespace(edge)[-3:] == 'adj'

def is_adp(edge):
    return is_symbol(edge) and symbol_namespace(edge)[-3:] == 'adp'

def is_noun(edge):
    if is_edge(edge):
        return False
    return symbol_namespace(edge)[-5:] == 'propn' or symbol_namespace(edge)[-4:] == 'noun'

def has_noun(edge):
    if is_edge(edge):
        for item in edge:
            if has_noun(item):
                return True
    else:
        return is_noun(edge)
    return False

def is_adjective_and_noun(edge):
    if len(edge) != 3:
        return False
    if edge[0] != '+/gb':
        return False
    if not is_adj(edge[1]):
        return False
    if not is_noun(edge[2]):
        return False
    return True

def is_cconj(edge):
    if is_edge(edge):
        return False
    return symbol_namespace(edge)[-5:] == 'cconj'

### Synonym funs

In [107]:
def syn_id(root):
    symbs = hg.symbols_with_root(root)
    for symb in symbs:
        if symbol_namespace(symb)[:3] == 'syn':
            return symb
    return None
        
def syn_label(syn_id):
    symbs = syn.synonyms(hg, syn_id)
    labels = []
    for symb in symbs:
        labels.append(hg.get_label(symb))
    best = None
    for label in labels:
        if best is None or len(label) < len(best):
            best = label
    parts = best.split('|')
    if len(parts) > 1:
        max_size = 0
        for part in parts:
            if len(part) > max_size:
                best = part.strip()
                max_size = len(best)
        
    return unidecode(best)

### Topic funs

In [85]:
def is_topic(symbol):
    if is_symbol(symbol) and symbol_namespace(symbol)[:3] == 'syn':
        root = symbol_root(symbol)
        return root[0].isalnum() and root[-1].isalnum()
    return False

def subtopics(edge):
    stopics = set()
    if is_edge(edge):
        for item in edge:
            topic = syn.main_synonym(hg, item)
            if is_topic(topic):
                stopics.add(item)
    return stopics

def choose_topic1(edge):
    topic = syn.main_synonym(hg, edge)
    if is_edge(edge):
        if is_compound_propn(edge) or is_adjective_and_noun(edge):
            return topic
        stopics = subtopics(edge)
        if len(stopics) == 1:
            return choose_topic1(next(iter(stopics)))    
    return topic

def choose_topic(edge):
    if is_edge(edge) and edge[0] == '+/gb' and is_adp(edge[1]):
        if len(edge) == 3:
            return choose_topic(edge[2])
        elif len(edge) > 3:
            return choose_topic1(edge)
    return syn.main_synonym(hg, edge)

def common_word(edge):
    syns = syn.synonyms(hg, edge)
    for s in syns:
        label = symbol2str(s)
        if ' ' in label:
            return False
        word = parser.make_word(label)
        if word.prob > MAX_PROB:
            return True
    return False

def atomic_topic(edge):
    atopic = syn.main_synonym(hg, edge)
    if is_topic(atopic) and not common_word(atopic):
        return {atopic}
    return set()

def atomic_topics(edge):
    atopics = atomic_topic(edge)
    if is_edge(edge):
        if len(edge) == 3 and edge[0] == '+/gb' and is_adp(edge[1]):
            return atomic_topics(edge[2])
        elif is_compound_propn(edge) or is_adjective_and_noun(edge):
            return atopics
        else:
            subtopics = set()
            for item in edge:
                subtopics |= atomic_topics(item)
            if len(subtopics) == 1:
                atopics = subtopics
            else:
                atopics |= subtopics
    return atopics

def atomic_topics_old(edge):
    atopics = atomic_topic(edge)
    if is_edge(edge):
        if is_compound_propn(edge) or is_adjective_and_noun(edge):
            return atopics
        for item in edge:
            atopics |= atomic_topics(item)
        # if len(atopics) > 0:
        #     return atopics
        # else:
        #     return atomic_topic(edge)
    return atopics

In [86]:
e = ('+/gb', 'of/nlp.of.adp', ('pulling/nlp.pull.verb', ('+/gb', 'stunts/nlp.stunt.noun', '–/nlp.–.noun'), ('+/gb', 'after/nlp.after.adp', ('kill/nlp.kill.verb', ('+/gb', 'american/nlp.american.adj', 'forces/nlp.force.noun')), ('+/gb', 'of/nlp.of.adp', 'dozens/nlp.dozen.noun', ('+/gb', 'syrian/nlp.syrian.adj', 'soldiers/nlp.soldier.noun')))))
#e = ('+/gb', 'against/nlp.against.adp', ('+/gb', 'in/nlp.in.adp', ('+/gb', '‘/nlp.‘.det', ('+/gb', 'syria/nlp.syria.propn', 'actions/nlp.action.noun', 'style’/nlp.style’.propn')), ('+/gb', 'n./nlp.n..propn', 'korea/nlp.korea.propn')))
#e = ('+/gb', 'for/nlp.for.adp', ('gone/nlp.go.verb', ('+/gb', 'mistral/nlp.mistral.propn', 'deal/nlp.deal.propn')), 'bad/nlp.bad.adj')
#e = ('+/gb', 'against/nlp.against.adp', ('+/gb', 'on/nlp.on.adp', 'attack/nlp.attack.noun', ('+/gb', 'syrian/nlp.syrian.adj', 'forces/nlp.force.noun')))
atomic_topics(e)

{'after_american_forces_kill_dozens_of_syrian_soldiers/syn1347213',
 'american_forces/syn137972',
 'pulling_stunts_–_after_american_forces_kill_dozens_of_syrian_soldiers/syn1347211',
 'stunt/syn996',
 'syrian_soldiers/syn13872'}

In [87]:
e = ('+/gb', 'middle/nlp.middle.propn', 'east/nlp.east.propn')
e = ('annexing/nlp.annex.verb', 'crimea/nlp.crimea.propn')
e = ('+/gb', 'syrian/nlp.syrian.adj', 'forces/nlp.force.noun')
hg.get_label(syn.main_synonym(hg, e))

'syrian forces'

### Entities, Actors and Conflicts

In [88]:
entities = {}
actors = {}
conflicts = {}


def edge2actors(edge, check_actors=True):
    if is_edge(edge) and edge[0] == '+/gb':
        if is_adp(edge[1]) and len(edge) == 3:
            return edge2actors(edge[2])
        elif is_cconj(edge[1]) and len(edge) > 2:
            result = set()
            for item in edge[2:]:
                result |= edge2actors(item)
            return result
    if has_noun(edge):
        synact = syn.main_synonym(hg, edge, in_adp=True)
        if check_actors:
            if synact in actors and actors[synact] > 2:
                return {synact}
        else:
            return {synact}
    return set()


def add_to_table(table, item):
    if item not in table:
        table[item] = 0
    table[item] += 1


def add_entity(entity):
    if entity not in entities:
        entities[entity] = {'mentions': {},
                            'mentioned_by': {},
                            'conflict_towards': {},
                            'conflict_from': {},
                            'conflict_actors': []}

        
def add_mention(actor, concept):
    add_entity(actor)
    add_entity(concept)
    add_to_table(entities[actor]['mentions'], concept)
    add_to_table(entities[concept]['mentioned_by'], actor)
    

def add_conflict(orig, targ, concept):
    add_entity(orig)
    add_entity(targ)
    add_entity(concept)
    add_to_table(entities[orig]['conflict_towards'], targ)
    add_to_table(entities[targ]['conflict_from'], orig)
    entities[concept]['conflict_actors'].append((orig, targ))
   

def add_conflict_tuple(orig, targ, concept, source):
    if orig not in conflicts:
        conflicts[orig] = []
    if targ not in conflicts:
        conflicts[targ] = []
    conflict_tuple = (orig, targ, concept, source)
    conflicts[orig].append(conflict_tuple)
    conflicts[targ].append(conflict_tuple)

mention_edges = hg.pattern2edges(('mention/gb.inf', None, None))
conflict_edges = hg.pattern2edges(('conflict/gb.inf', None, None, None))
nclaims = len(mention_edges)
nconflicts = len(conflict_edges)
N = (nclaims + nconflicts) * 2

i = 0
with progressbar.ProgressBar(max_value=N) as bar:
    # find actors
    for edge in hg.pattern2edges(('mention/gb.inf', None, None)):
        origs = edge2actors(edge[1], check_actors=False)
        for orig in origs:
            if orig not in actors:
                actors[orig] = 0
            actors[orig] += 1
        i += 1
        bar.update(i)
        
    for edge in hg.pattern2edges(('conflict/gb.inf', None, None, None)):
        origs = edge2actors(edge[1], check_actors=False)
        for orig in origs:
            if orig not in actors:
                actors[orig] = 0
            actors[orig] += 1
        i += 1
        bar.update(i)


    # conflicts and mentions
    for edge in hg.pattern2edges(('mention/gb.inf', None, None)):
        origs = edge2actors(edge[1])
        topics = atomic_topics(edge[2])
        for orig in origs:
            for topic in topics:
                add_mention(orig, topic)
        i += 1
        bar.update(i)
        
    for edge in hg.pattern2edges(('conflict/gb.inf', None, None, None)):
        origs = edge2actors(edge[1])
        targs = edge2actors(edge[2])
        topics = atomic_topics(edge[3])
        detailed_topic = choose_topic(edge[3])
        source = hg.pattern2edges(('source/gb.inf', edge, None)).pop()[2]
        for orig in origs:
            for targ in targs:
                add_conflict_tuple(orig, targ, detailed_topic, source)
                for topic in topics:
                    add_conflict(orig, targ, topic)
        i += 1
        bar.update(i)

print('claims: %s' % nclaims)
print('conflicts: %s' % nconflicts)

100% (31474 of 31474) |##################| Elapsed Time: 0:01:32 Time:  0:01:32


claims: 13100
conflicts: 2637


### Metrics

In [89]:
def compute_metrics(entity):
    mentioned_by = entities[entity]['mentioned_by']
    weights = [mentioned_by[item] for item in mentioned_by]
    total = sum(weights)
    h = 0.
    if total > 0:
        h_weights = [float(i) / float(total) for i in weights]
        h_weights = [i * i for i in h_weights]
        h = 1. / sum(h_weights)
    entities[entity]['h'] = h
    entities[entity]['total'] = total
    entities[entity]['degree'] = syn.synonyms_degree(hg, entity)
    entities[entity]['c'] = len(entities[entity]['conflict_actors'])
    
def metrics(concept):
    return {'h': entities[concept]['h'],
            'total': entities[concept]['total'],
            'degree': entities[concept]['degree'],
            'c': entities[concept]['c']}

i = 0
with progressbar.ProgressBar(max_value=len(entities)) as bar:
    for entity in entities:
        compute_metrics(entity)
        i += 1
        bar.update(i)

100% (35662 of 35662) |##################| Elapsed Time: 0:00:13 Time:  0:00:13


### One Actor

In [22]:
actor = syn_id('israel')

lactor = hg.get_label(actor)
json_data = {}

for conflict_tuple in conflicts[actor]:
    other = conflict_tuple[0]
    direction = 'from'
    if other == actor:
        other = conflict_tuple[1]
        direction = 'to'
    topic = conflict_tuple[2]
    lother = hg.get_label(other)
    ltopic = syn_label(topic)
    if lother not in json_data:
        json_data[lother] = {}
    if ltopic not in json_data[lother]:
        json_data[lother][ltopic] = metrics(other)
        json_data[lother][ltopic]['from'] = 0
        json_data[lother][ltopic]['to'] = 0
    json_data[lother][ltopic][direction] += 1
    json_data[lother][ltopic]['source'] = conflict_tuple[3]

topics = {} 

for targ in json_data:
    concepts = set(json_data[targ].keys())
    for concept in concepts:
        if concept not in topics:
            topics[concept] = 0
        topics[concept] += 1
        
for targ in json_data:
    for concept in json_data[targ]:
        json_data[targ][concept]['local_degree'] = topics[concept]
        
with open('%s.json' % lactor, 'wt') as out:
    res = j.dump(json_data, out, sort_keys=False, indent=4, separators=(',', ': '))
    
json_data

{'iran': {'syria crisis': {'h': 25.491620111731795,
   'total': 117,
   'degree': 4042,
   'c': 6,
   'from': 1,
   'to': 0,
   'source': ('warns/nlp.warn.verb',
    'iran/nlp.iran.propn',
    'israel/nlp.israel.propn',
    ('+/gb',
     'over/nlp.over.adp',
     ('+/gb', 'syria/nlp.syria.propn', 'crisis/nlp.crisis.noun'))),
   'local_degree': 1},
  'hiring assassins to take out nuclear scientists': {'h': 25.491620111731795,
   'total': 117,
   'degree': 4042,
   'c': 6,
   'from': 1,
   'to': 0,
   'source': ('accuses/nlp.accuse.verb',
    'iran/nlp.iran.propn',
    'israel/nlp.israel.propn',
    ('+/gb',
     'of/nlp.of.adp',
     ('hiring/nlp.hire.verb',
      'assassins/nlp.assassin.noun',
      ('to_take_out/nlp.to.part+nlp.take.verb+nlp.out.part',
       ('+/gb',
        'nuclear/nlp.nuclear.adj',
        'scientists/nlp.scientist.noun'))))),
   'local_degree': 1},
  'damaging nuclear talks': {'h': 25.491620111731795,
   'total': 117,
   'degree': 4042,
   'c': 6,
   'from': 1,
 

## Two Actors

In [22]:
def syn_label(syn_id):
    return hg.get_label(syn_id)

def mentions_intersection(actors):
    intersection = None
    for actor in actors:
        concepts = set(entities[actor]['mentions'].keys())
        if intersection is None:
            intersection = concepts
        else:
            intersection &= concepts
    return intersection


def mentions_outside(actor, concepts):
    return set(entities[actor]['mentions'].keys()).difference(concepts)


def actors_from_concepts(concepts):
    actors = {}
    for concept in concepts:
        h = entities[concept]['h']
        for actor in entities[concept]['mentioned_by']:
            if actor in actors:
                if actors[actor] > h:
                    actors[actor] = h
            else:
                actors[actor] = h
    return actors


def show_by_h(concepts, decreasing=True):
    concepts_h = {}
    for concept in concepts:
        concepts_h[concept] = entities[concept]['h']

    sorted_h = sorted(concepts_h.items(), key=operator.itemgetter(1), reverse=decreasing)

    for concept, h in sorted_h:
        print('%s [%s]' % (syn_label(concept), h))


def metrics_with_similarities(concept, actor1, actor2):
    m = metrics(concept)
    sim1 = hs.similarity(actor1, concept)
    sim2 = hs.similarity(actor2, concept)
    m['similarity1'] = sim1
    m['similarity2'] = sim2
    if sim1 > 0 and sim2 > 0:
        m['sim_ratio'] = sim1 / sim2
        m['log_sim_ratio'] = math.log(sim1 / sim2)
    else:
        m['sim_ratio'] = 0
        m['log_sim_ratio'] = 0
    return m


actor1 = syn_id('russia')
actor2 = syn_id('china')

actor1_name = hg.get_label(actor1)
actor2_name = hg.get_label(actor2)

inter = mentions_intersection([actor1, actor2])
actor1_only = mentions_outside(actor1, inter)
actor2_only = mentions_outside(actor2, inter)
common_actors = actors_from_concepts(inter)

data = {'actors': [actor1_name, actor2_name],
        'common_concepts': {},
        'individual_concepts': {actor1_name: {}, actor2_name: {}},
        'common_actors': {}}

for concept in inter:
    data['common_concepts'][syn_label(concept)] = metrics_with_similarities(concept, actor1, actor2)

for concept in actor1_only:
    if entities[concept]['h'] > 1.:
        data['individual_concepts'][actor1_name][syn_label(concept)] = metrics_with_similarities(concept, actor1, actor2)

for concept in actor2_only:
    if entities[concept]['h'] > 1.:
        data['individual_concepts'][actor2_name][syn_label(concept)] = metrics_with_similarities(concept, actor1, actor2)
        
for actor in common_actors:
    if entities[concept]['h'] > 1.:
        data['individual_concepts'][actor2_name][syn_label(concept)] = metrics(concept)

for actor in common_actors:
    data['common_actors'][syn_label(actor)] = metrics(concept)

with open('china-russia.json', 'wt') as out:
    res = j.dump(data, out, sort_keys=False, indent=4, separators=(',', ': '))

print('INTER')
show_by_h(inter, decreasing=False)
print()

print(actor1)
show_by_h(actor1_only)
print()

print(actor2)
show_by_h(actor2_only)
print()

print('COMMON_ACTORS')
show_actors_by_h(common_actors, decreasing=False)
print()

INTER
no proof [1.7999999999999998]
vietnam [2.0]
to consider [2.0]
own interests [2.0]
missile system [2.6666666666666665]
doubts [3.0]
infected with [3.0]
natural gas [3.0]
has detained [3.2]
kerry [3.571428571428571]
proxy [4.0]
workings [4.0]
proofs [4.499999999999999]
cutest [4.5]
warships [4.571428571428571]
naval [5.333333333333333]
should be [5.444444444444445]
selves [5.999999999999999]
no plans [5.999999999999999]
submarine [6.230769230769231]
2020 [6.249999999999997]
found in [6.4]
resolutions [6.759999999999998]
3,000 [7.0]
to attack [7.0]
air force [7.0]
mh17 [7.529411764705882]
aircraft carrier [8.066666666666666]
drills [8.999999999999998]
last month [9.0]
as it [9.0]
defenses [9.090909090909088]
exercising [9.941176470588232]
violating [10.285714285714285]
has killed [10.57232704402516]
nukes [11.842105263157894]
internets [12.499999999999998]
summit [12.999999999999996]
tensions [13.235294117647058]
zika [13.333333333333329]
investigations [13.370370370370368]
united s

has agreed [3.0]
past 24 hours [3.0]
will consider [3.0]
suspends [3.0]
telephone [3.0]
to extend [3.0]
to bomb [3.0]
un resolution [3.0]
will defend [3.0]
to include [3.0]
eight others [3.0]
loaned [3.0]
owes [2.9999999999999996]
stalls [2.9999999999999996]
to turkey [2.6666666666666665]
to sell [2.6666666666666665]
syrian city [2.6666666666666665]
faiths [2.6666666666666665]
eastern syria [2.6666666666666665]
flown [2.6666666666666665]
in may [2.6666666666666665]
does not want [2.6666666666666665]
to attend [2.6666666666666665]
dagestan [2.6666666666666665]
blaze [2.2727272727272725]
wells [2.0]
scorched [2.0]
accord [2.0]
by strike [2.0]
phosphorus [2.0]
will recognise [2.0]
3 billion [2.0]
so called [2.0]
must fulfil [2.0]
truths [2.0]
invitation [2.0]
military presence [2.0]
complicit [2.0]
compatriots [2.0]
syrian conflict [2.0]
not responsible [2.0]
armenia [2.0]
wo n’t intervene [2.0]
climbs [2.0]
traces of explosives [2.0]
russian military [2.0]
may abandon [2.0]
u.s. policies

in aleppo latakia [1.0]
air units [1.0]
an air strike [1.0]
all of aleppo has fallen to syrian government control [1.0]
international community not ukraine [1.0]
a claim that a us official called preposterous [1.0]
near on syria flights [1.0]
lethal aid to ukraine [1.0]
proposed by russia and the us [1.0]
all st petersburg suspects [1.0]
arms treaty concerns [1.0]
will not drop [1.0]
turkey iran [1.0]
u.s. missiles in poland romania would violate treaty [1.0]
to bomb syrian militants [1.0]
group:''we [1.0]
its embassy in damascus act of terrorism | reuters [1.0]
syrian lull [1.0]
states on syria [1.0]
shelling of its embassy in damascus act of terrorism | reuters [1.0]
trade with isis [1.0]
to move fast to fix [1.0]
ashuluk [1.0]
tension in the worst east west standoff since the cold war [1.0]
the mh17 tragedy [1.0]
turkish leadership involved in illegal oil trade with isis [1.0]
friday ria [1.0]
tourists [1.0]
to deploy security forces to syria [1.0]
japan 's northern coast [1.0]
unit

detained is [1.0]
out of ukrainian affairs [1.0]
should find [1.0]
not responsible for strike on school in syria 's idlib [1.0]
is fully mobilized [1.0]
turkey should pay compensation for downed jet [1.0]
as close [1.0]
cessation of hostilities in syria [1.0]
toward new cold war [1.0]
ifax [1.0]
government control [1.0]
fulfil truce deal [1.0]
29 times [1.0]
strikes would harm [1.0]
could ease [1.0]
ratcheting up pressure on its neighbor as they scrap over the future of the black sea crimea region [1.0]
declared by [1.0]
which lie off japan 's northern coast and are claimed by japan [1.0]
uncharted territory [1.0]
mortar shells [1.0]
isis recruiters [1.0]
seize parliament [1.0]
of war crimes found in east of aleppo syria [1.0]
talks were taking place to include aleppo in a temporary lull in fighting declared by the syrian army in some western parts of the country a sign of intensified efforts to halt a surge of violence in its former commercial capital [1.0]
inf treaty [1.0]
them a ter

putin is fully mobilized to tackle unprecedented threat from turkey [1.0]
would violate treaty [1.0]
sanctions ' destructive will act in own interests [1.0]
a terrorist group:''we [1.0]
air link with egypt will be restored soon [1.0]
military goods [1.0]
near with u.s. on syria flights [1.0]

china/syn11901
trump [32.563380281690144]
the president [32.356521739130415]
drugged [25.328767123287665]
iraq [24.880258899676328]
amid [23.272727272727273]
percents [21.551724137931043]
extremists [18.181818181818173]
india [17.666666666666654]
refugees [17.51515151515149]
manned [16.705329153604993]
childs [16.680387409200982]
palestinian [16.133333333333333]
the world [16.0]
phoned [15.695652173913043]
clashes [15.243902439024396]
human rights [15.0]
fossil [14.695652173913047]
blasts [14.516129032258071]
waters [14.400000000000006]
canada [13.764044943820233]
tourists [13.761904761904766]
economies [13.761904761904765]
crackdown [13.5]
suicides [13.34126984126982]
allegations [12.999999999999

south china sea case [1.0]
hyping [1.0]
philippines must heal rift over south china sea as indonesia speaks out [1.0]
internet security necessary to counter hostile forces [1.0]
to meet fuel consumption requirements on passenger vehicles set for 2015 [1.0]
factory activity expands at slower pace in april | afp.com [1.0]
restricting [1.0]
turn source code [1.0]
from invading chinese airspace [1.0]
adjustment in south china sea [1.0]
u.s. solar ruling will hurt clean energy development [1.0]
is fighters back from syria caught in xinjiang [1.0]
vietnam banned imports of chinese poultry [1.0]
fighting proxy war in syria [1.0]
in its own interests on missile system [1.0]
it will cooperate with trump but warns on taiwan [1.0]
terrible injustice [1.0]
meltdown [1.0]
any damage [1.0]
taiwan president elect must respect constitution [1.0]
to make up at least 30 percent of government vehicle purchases by 2016 [1.0]
joint crackdown [1.0]
who have entered its territory to escape from myanmar fight

earth next year [1.0]
the refusal of the self ruled island 's new government to recognize the one china principle [1.0]
to attack the us homeland with conventional weapons using nuclear submarines [1.0]
taiwanese activist [1.0]
vehicle charging fund [1.0]
to the scene of an increasingly ugly spat with vietnam over an oil rig in the south china sea [1.0]
5 are arrested in beijing attack [1.0]
us behind hong kong protests [1.0]
swimmers [1.0]
in april | afp.com [1.0]
through which [1.0]
through which $ 5 trillion in ship borne trade passes every year [1.0]
must avoid [1.0]
provocation in south china sea [1.0]
on south china sea history [1.0]
reactor ready by next year [1.0]
more islands [1.0]
in the latest show of tension between the two [1.0]
australia is on fringes of civilisation after swimmer mack horton attacks sun yang [1.0]
cooperation confrontation [1.0]
pterosaur [1.0]
interpol ' red notice issued for tycoon guo wengui [1.0]
to turn over source code and build back doors [1.0]
im

naval ties with united states [1.0]
must respect constitution [1.0]
to celebrate [1.0]
its ivory trade [1.0]
nanjing more worthy of remembrance than hiroshima [1.0]
by philippines ' request for un arbitration- calls international dispute resolution illegal and unreasonable [1.0]
21 of its nationals in singapore infected with zika [1.0]
to finish some land reclamation in south china sea [1.0]
over 300 of its new large military transport aircraft [1.0]
appropriate handling of ukraine [1.0]
recently announced russian maritime doctrine [1.0]
wo n't take [1.0]
cardiac arrest [1.0]
to do list [1.0]
shendiao [1.0]
kerry 's call for internet freedom [1.0]
to korean peninsula [1.0]
speaks out [1.0]
swimmer mack horton [1.0]
clean energy development [1.0]
any way [1.0]
an oil rig in the south china sea [1.0]
an ongoing crackdown on civil society [1.0]
practical reality [1.0]
would impact [1.0]
the sector 's contribution to economic growth [1.0]
as japan and hong kong stepped up vigilance against

NameError: name 'show_actors_by_h' is not defined

### Conflict Topics

In [90]:
sorted_conflict_topics = sorted([(entity, entities[entity]['c']) for entity in entities], key=operator.itemgetter(1), reverse=True)

sorted_conflict_topics

[('syria/syn11910', 45),
 ('ukraine/syn25', 23),
 ('gaza/syn199', 17),
 ('china/syn11901', 13),
 ('bordering/syn1880', 13),
 ('seas/syn98', 13),
 ('south_china_sea/syn4186', 11),
 ('eu/syn50', 11),
 ('russia/syn11969', 10),
 ('sanctions/syn311', 8),
 ('bombings/syn330', 8),
 ('iran/syn11989', 7),
 ('war_crimes/syn12185', 7),
 ('conflicts/syn84', 7),
 ('cyber/syn30', 7),
 ('palestinians/syn103', 7),
 ('dispute/syn11', 7),
 ('human_rights/syn4210', 7),
 ('isis/syn8', 6),
 ('islamic/syn148', 6),
 ('crimea/syn476', 6),
 ('killings/syn236', 6),
 ('paris/syn52', 6),
 ('provocation/syn27243', 6),
 ('europe/syn12199', 5),
 ('worsening/syn47032', 5),
 ('refugees/syn130', 5),
 ('bbc/syn40', 5),
 ('lama/syn1813', 5),
 ('damascus/syn16877', 5),
 ('settlement/syn447', 5),
 ('middle_east/syn4229', 5),
 ('leaks/syn987', 5),
 ('deploys/syn18076', 5),
 ('israel/syn12009', 4),
 ('jets/syn821', 4),
 ('syrian/syn12358', 4),
 ('kashmir/syn13200', 4),
 ('rebels/syn13', 4),
 ('embassy/syn76', 4),
 ('abuses/s

### Conflict graph

In [112]:
topic = 'syria/syn11910'
conflict_pairs = entities[topic]['conflict_actors']
cactors = set()
for pair in conflict_pairs:
    cactors.add(syn_label(pair[0]))
    cactors.add(syn_label(pair[1]))

G = nx.DiGraph()
for actor in cactors:
    G.add_node(actor)
for edge in conflict_pairs:
    G.add_edge(syn_label(edge[0]), syn_label(edge[1]))
    
nx.write_gml(G, '%s.gml' % syn_label(topic))

### Factions

In [116]:
class Factions:
    def __init__(self, topic):
        self.topic = topic
        self.edges = {}
        self.degrees = {}
        self.main_conflict = None
        self.actors = set()
        self.factions = (set(), set(), set())
        
    def attacks_faction(self, actor, faction):
        for actor2 in self.factions[faction]:
            edge = tuple(sorted((actor, actor2)))
            if edge in self.edges:
                return True
        return False

    def assign_faction(self, actor):
        if actor in self.actors:
            return
        self.actors.add(actor)
        if self.attacks_faction(actor, 0) and not self.attacks_faction(actor, 1):
            self.factions[1].add(actor)
        elif self.attacks_faction(actor, 1) and not self.attacks_faction(actor, 0):
            self.factions[0].add(actor)
        else:
            self.factions[2].add(actor)

    def process(self):
        conflict_pairs = entities[self.topic]['conflict_actors']

        for pair in conflict_pairs:
            edge = tuple(sorted(pair))
            if edge not in self.edges:
                self.edges[edge] = 0
            actor1, actor2 = edge
            if actor1 not in self.degrees:
                self.degrees[actor1] = 0
            self.degrees[actor1] += 1
            if actor2 not in self.degrees:
                self.degrees[actor2] = 0
            self.degrees[actor2] += 1
    
        for edge in self.edges:
            self.edges[edge] = min([self.degrees[actor] for actor in edge])

        sorted_edges = sorted([(edge[0], edge[1], self.edges[edge]) for edge in self.edges], key=operator.itemgetter(2), reverse=True)

        self.main_conflict = sorted_edges[0]
        self.actors.add(self.main_conflict[0])
        self.actors.add(self.main_conflict[1])
        self.factions[0].add(self.main_conflict[0])
        self.factions[1].add(self.main_conflict[1])

        for edge in sorted_edges[1:]:
            self.assign_faction(edge[0])
            self.assign_faction(edge[1])
            
    def get_faction(self, actor):
        if actor in self.factions[0]:
            return -1
        elif actor in self.factions[1]:
            return 1
        else:
            return 0
        
f = Factions('syria/syn11910')
f.process()
print(f.main_conflict)

print('FACTION A')
for actor in f.factions[0]:
    print(syn_label(actor))
print('FACTION B')
for actor in f.factions[1]:
    print(syn_label(actor))
print('UNDECIDED')
for actor in f.factions[2]:
    print(syn_label(actor))

('russia/syn11969', 'u.s./syn26', 24)
FACTION A
russia
iran
hezbollah
moscow
saudi
china
putin
damascus
FACTION B
germany
turkish pm
obama
israel
united states
syria opposition
benjamin
france
us
turkey pm
UNDECIDED
saudi king
assad
john
ankara
un
turkey


### Generate topics data

In [113]:
json_data = {}

for topic, c in sorted_conflict_topics:
    if c > 1:
        f = Factions(topic)
        f.process()
    
        cactors = {}
        for actor in f.actors:
            cactor = {'faction': f.get_faction(actor), 'outdeg': 0, 'indeg': 0}
            lactor = syn_label(actor)
            cactors[lactor] = cactor
        
        for pair in entities[topic]['conflict_actors']:
            orig = syn_label(pair[0])
            targ = syn_label(pair[1])
            cactors[orig]['outdeg'] += 1
            cactors[targ]['indeg'] += 1
    
        main_conflict = [syn_label(f.main_conflict[0]), syn_label(f.main_conflict[1])]
    
        ltopic = syn_label(topic)
        json_data[ltopic] = {'conflictuality': c, 'actors': cactors, 'main_conflict': main_conflict}
        
with open('conflict_topics.json', 'wt') as out:
    res = j.dump(json_data, out, sort_keys=False, indent=4, separators=(',', ': '))

In [5]:
str2edge('(is/gb * * ...)')

('is/gb', '*', '*', '...')