# News Analysis

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import operator
import math
import progressbar
import json as j
from unidecode import unidecode
import networkx as nx
from gb.notebook import *
import gb.synonyms.synonyms as syn
from gb.metrics.hyper_similarity import HyperSimilarity

### HyperGraph, etc.

In [3]:
hg = init_hypergraph('../reddit-worldnews-01012013-01082017-new-inference3.hg')
hs = HyperSimilarity(hg)

### Topics

In [9]:
def is_compound_propn(edge):
    if len(edge) < 3:
        return False
    if edge[0] != '+/gb':
        return False
    for item in edge[1:]:
        if is_symbol(item):
            if symbol_namespace(item)[-5:] != 'propn':
                return False
        elif not is_compound_propn(item):
            return False
    return True

def is_adj(edge):
    if is_edge(edge):
        return False
    return symbol_namespace(edge)[-3:] == 'adj'

def is_noun(edge):
    if is_edge(edge):
        return False
    return symbol_namespace(edge)[-5:] == 'propn' or symbol_namespace(edge)[-4:] == 'noun'

def has_noun(edge):
    if is_edge(edge):
        for item in edge:
            if has_noun(item):
                return True
    else:
        return is_noun(edge)
    return False

def is_adjective_and_noun(edge):
    if len(edge) != 3:
        return False
    if edge[0] != '+/gb':
        return False
    if not is_adj(edge[1]):
        return False
    if not is_noun(edge[2]):
        return False
    return True

def is_cconj(edge):
    if is_edge(edge):
        return False
    return symbol_namespace(edge)[-5:] == 'cconj'

def is_topic(symbol):
    if is_symbol(symbol) and symbol_namespace(symbol)[:3] == 'syn':
        root = symbol_root(symbol)
        return root[0].isalnum() and root[-1].isalnum()
    return False

def subtopics(edge):
    stopics = set()
    if is_edge(edge):
        for item in edge:
            topic = syn.main_synonym(hg, item)
            if is_topic(topic):
                stopics.add(item)
    return stopics

def choose_topic1(edge):
    topic = syn.main_synonym(hg, edge)
    if is_edge(edge):
        if is_compound_propn(edge) or is_adjective_and_noun(edge):
            return topic
        stopics = subtopics(edge)
        if len(stopics) == 1:
            return choose_topic1(next(iter(stopics)))    
    return topic

def is_adp(edge):
    return is_symbol(edge) and symbol_namespace(edge)[-3:] == 'adp'

def choose_topic(edge):
    if is_edge(edge) and edge[0] == '+/gb' and is_adp(edge[1]):
        if len(edge) == 3:
            return choose_topic(edge[2])
        elif len(edge) > 3:
            return choose_topic1(edge)
    return syn.main_synonym(hg, edge)

def atomic_topic(edge):
    atopic = syn.main_synonym(hg, edge)
    if is_topic(atopic):
        return {atopic}
    return set()

def atomic_topics(edge):
    atopics = atomic_topic(edge)
    if is_edge(edge):
        if is_compound_propn(edge) or is_adjective_and_noun(edge):
            return atopics
        for item in edge:
            atopics |= atomic_topics(item)
        # if len(atopics) > 0:
        #     return atopics
        # else:
        #     return atomic_topic(edge)
    return atopics

In [10]:
e = ('+/gb', 'of/nlp.of.adp', ('pulling/nlp.pull.verb', ('+/gb', 'stunts/nlp.stunt.noun', '–/nlp.–.noun'), ('+/gb', 'after/nlp.after.adp', ('kill/nlp.kill.verb', ('+/gb', 'american/nlp.american.adj', 'forces/nlp.force.noun')), ('+/gb', 'of/nlp.of.adp', 'dozens/nlp.dozen.noun', ('+/gb', 'syrian/nlp.syrian.adj', 'soldiers/nlp.soldier.noun')))))
#e = ('+/gb', 'against/nlp.against.adp', ('+/gb', 'in/nlp.in.adp', ('+/gb', '‘/nlp.‘.det', ('+/gb', 'syria/nlp.syria.propn', 'actions/nlp.action.noun', 'style’/nlp.style’.propn')), ('+/gb', 'n./nlp.n..propn', 'korea/nlp.korea.propn')))
#e = ('+/gb', 'for/nlp.for.adp', ('gone/nlp.go.verb', ('+/gb', 'mistral/nlp.mistral.propn', 'deal/nlp.deal.propn')), 'bad/nlp.bad.adj')
#e = ('+/gb', 'against/nlp.against.adp', ('+/gb', 'on/nlp.on.adp', 'attack/nlp.attack.noun', ('+/gb', 'syrian/nlp.syrian.adj', 'forces/nlp.force.noun')))
atomic_topics(e)

{'after_american_forces_kill_dozens_of_syrian_soldiers/syn1347213',
 'american_forces/syn137972',
 'american_forces_kill/syn1347214',
 'dozens_of_syrian_soldiers/syn1347215',
 'of_pulling_stunts_–_after_american_forces_kill_dozens_of_syrian_soldiers/syn1347210',
 'pulling_stunts_–_after_american_forces_kill_dozens_of_syrian_soldiers/syn1347211',
 'stunt/syn996',
 'syrian_soldiers/syn13872'}

In [11]:
e = ('+/gb', 'middle/nlp.middle.propn', 'east/nlp.east.propn')
e = ('annexing/nlp.annex.verb', 'crimea/nlp.crimea.propn')
e = ('+/gb', 'syrian/nlp.syrian.adj', 'forces/nlp.force.noun')
hg.get_label(syn.main_synonym(hg, e))

'syrian forces'

### Synonym funs

In [7]:
def syn_id(root):
    symbs = hg.symbols_with_root(root)
    for symb in symbs:
        if symbol_namespace(symb)[:3] == 'syn':
            return symb
    return None
        
def syn_label(syn_id):
    symbs = syn.synonyms(hg, syn_id)
    labels = []
    for symb in symbs:
        labels.append(hg.get_label(symb))
    min_size = 999999
    best = None
    for label in labels:
        if len(label) < min_size:
            min_size = len(label)
            best = label
    parts = best.split('|')
    if len(parts) > 1:
        max_size = 0
        for part in parts:
            if len(part) > max_size:
                best = part.strip()
                max_size = len(best)
        
    return unidecode(best)

### Actors

In [12]:
# actors = {}
# for edge in hg.pattern2edges(('is_actor/gb.inf', None)):
#     actor = edge[1]
#     actors[actor] = 0

### Entities

In [27]:
nclaims = 0
nconflicts = 0

actors = {}
entities = {}


def edge2actors(edge, check_actors=True):
    if is_edge(edge) and edge[0] == '+/gb':
        if is_adp(edge[1]) and len(edge) == 3:
            return edge2actors(edge[2])
        elif is_cconj(edge[1]) and len(edge) > 2:
            result = set()
            for item in edge[2:]:
                result |= edge2actors(item)
            return result
    if has_noun(edge):
        synact = syn.main_synonym(hg, edge, in_adp=True)
        if check_actors:
            if synact in actors and actors[synact] > 2:
                return {synact}
        else:
            return {synact}
    return set()


def add_to_table(table, item):
    if item not in table:
        table[item] = 0
    table[item] += 1


def add_entity(entity):
    if entity not in entities:
        entities[entity] = {'mentions': {},
                            'mentioned_by': {},
                            'conflict_towards': {},
                            'conflict_from': {},
                            'conflict_actors': []}

        
def add_mention(actor, concept):
    add_entity(actor)
    add_entity(concept)
    add_to_table(entities[actor]['mentions'], concept)
    add_to_table(entities[concept]['mentioned_by'], actor)
    

def add_conflict(orig, targ, concept):
    add_entity(orig)
    add_entity(targ)
    add_entity(concept)
    add_to_table(entities[orig]['conflict_towards'], targ)
    add_to_table(entities[targ]['conflict_from'], orig)
    entities[concept]['conflict_actors'].append((orig, targ))

    
# find actors
for edge in hg.pattern2edges(('mention/gb.inf', None, None)):
    origs = edge2actors(edge[1], check_actors=False)
    for orig in origs:
        if orig not in actors:
            actors[orig] = 0
        actors[orig] += 1
        
for edge in hg.pattern2edges(('conflict/gb.inf', None, None, None)):
    origs = edge2actors(edge[1], check_actors=False)
    for orig in origs:
        if orig not in actors:
            actors[orig] = 0
        actors[orig] += 1


# conflicts and mentions
for edge in hg.pattern2edges(('mention/gb.inf', None, None)):
    nclaims += 1
    origs = edge2actors(edge[1])
    topics = atomic_topics(edge[2])
    for orig in origs:
        for topic in topics:
            add_mention(orig, topic)
        
for edge in hg.pattern2edges(('conflict/gb.inf', None, None, None)):
    nconflicts += 1
    origs = edge2actors(edge[1])
    targs = edge2actors(edge[2])
    topics = atomic_topics(edge[3])
    for orig in origs:
        for targ in targs:
            for topic in topics:
                add_conflict(orig, targ, topic)

print('claims: %s' % nclaims)
print('conflicts: %s' % nconflicts)

claims: 13100
conflicts: 2637


## Metrics

In [28]:
def compute_metrics(entity):
    mentioned_by = entities[entity]['mentioned_by']
    weights = [mentioned_by[item] for item in mentioned_by]
    total = sum(weights)
    h = 0.
    if total > 0:
        h_weights = [float(i) / float(total) for i in weights]
        h_weights = [i * i for i in h_weights]
        h = 1. / sum(h_weights)
    entities[entity]['h'] = h
    entities[entity]['total'] = total
    entities[entity]['degree'] = syn.synonyms_degree(hg, entity)
    entities[entity]['c'] = len(entities[entity]['conflict_actors'])
    
def metrics(concept):
    return {'h': entities[concept]['h'],
            'total': entities[concept]['total'],
            'degree': entities[concept]['degree'],
            'c': entities[concept]['c']}

i = 0
with progressbar.ProgressBar(max_value=len(entities)) as bar:
    for entity in entities:
        compute_metrics(entity)
        i += 1
        bar.update(i)

100% (50615 of 50615) |##################| Elapsed Time: 0:00:27 Time:  0:00:27


### Conflicts

In [16]:
conflicts = {}

for actor in actors:
    conflicts[actor] = []

for edge in hg.pattern2edges(('conflict/gb.inf', None, None, None)):
    orig = edge[1]
    targ = edge[2]
    # topics = {choose_topic(edge[3])}
    topics = atomic_topics(edge[3])
    for topic in topics:
        actors[orig] += 1
        actors[targ] += 1
        source = hg.pattern2edges(('source/gb.inf', edge, None)).pop()[2]
        triplet = (orig, targ, topic, source)
        conflicts[orig].append(triplet)
        conflicts[targ].append(triplet)

## One Actor

In [17]:
actor = syn_id('israel')

lactor = hg.get_label(actor)
json_data = {}

for triplet in conflicts[actor]:
    other = triplet[0]
    direction = 'from'
    if other == actor:
        other = triplet[1]
        direction = 'to'
    topic = triplet[2]
    lother = hg.get_label(other)
    ltopic = syn_label(topic)
    if lother not in json_data:
        json_data[lother] = {}
    if ltopic not in json_data[lother]:
        json_data[lother][ltopic] = metrics(other)
        json_data[lother][ltopic]['from'] = 0
        json_data[lother][ltopic]['to'] = 0
    json_data[lother][ltopic][direction] += 1
    json_data[lother][ltopic]['source'] = triplet[3]

topics = {}

for targ in json_data:
    concepts = set(json_data[targ].keys())
    for concept in concepts:
        if concept not in topics:
            topics[concept] = 0
        topics[concept] += 1
        
for targ in json_data:
    for concept in json_data[targ]:
        json_data[targ][concept]['local_degree'] = topics[concept]
        
with open('%s.json' % lactor, 'wt') as out:
    res = j.dump(json_data, out, sort_keys=False, indent=4, separators=(',', ': '))
    
json_data

{'palestinian authority': {'for killing of palestinian attackers': {'h': 2.0,
   'total': 2,
   'degree': 101,
   'c': 1,
   'from': 1,
   'to': 0,
   'source': ('condemns/nlp.condemn.verb',
    ('+/gb',
     'palestinian/nlp.palestinian.propn',
     'authority/nlp.authority.propn'),
    'israel/nlp.israel.propn',
    ('+/gb',
     'for/nlp.for.adp',
     ('+/gb',
      'of/nlp.of.adp',
      'killing/nlp.killing.noun',
      ('+/gb',
       'palestinian/nlp.palestinian.adj',
       'attackers/nlp.attacker.noun')))),
   'local_degree': 1},
  'palestinian attacker': {'h': 2.0,
   'total': 2,
   'degree': 101,
   'c': 1,
   'from': 1,
   'to': 0,
   'source': ('condemns/nlp.condemn.verb',
    ('+/gb',
     'palestinian/nlp.palestinian.propn',
     'authority/nlp.authority.propn'),
    'israel/nlp.israel.propn',
    ('+/gb',
     'for/nlp.for.adp',
     ('+/gb',
      'of/nlp.of.adp',
      'killing/nlp.killing.noun',
      ('+/gb',
       'palestinian/nlp.palestinian.adj',
       'attack

## Two Actors

In [18]:
def syn_label(syn_id):
    return hg.get_label(syn_id)

def mentions_intersection(actors):
    intersection = None
    for actor in actors:
        concepts = set(entities[actor]['mentions'].keys())
        if intersection is None:
            intersection = concepts
        else:
            intersection &= concepts
    return intersection


def mentions_outside(actor, concepts):
    return set(entities[actor]['mentions'].keys()).difference(concepts)


def actors_from_concepts(concepts):
    actors = {}
    for concept in concepts:
        h = entities[concept]['h']
        for actor in entities[concept]['mentioned_by']:
            if actor in actors:
                if actors[actor] > h:
                    actors[actor] = h
            else:
                actors[actor] = h
    return actors


def show_by_h(concepts, decreasing=True):
    concepts_h = {}
    for concept in concepts:
        concepts_h[concept] = entities[concept]['h']

    sorted_h = sorted(concepts_h.items(), key=operator.itemgetter(1), reverse=decreasing)

    for concept, h in sorted_h:
        print('%s [%s]' % (syn_label(concept), h))


def metrics_with_similarities(concept, actor1, actor2):
    m = metrics(concept)
    sim1 = hs.similarity(actor1, concept)
    sim2 = hs.similarity(actor2, concept)
    m['similarity1'] = sim1
    m['similarity2'] = sim2
    if sim1 > 0 and sim2 > 0:
        m['sim_ratio'] = sim1 / sim2
        m['log_sim_ratio'] = math.log(sim1 / sim2)
    else:
        m['sim_ratio'] = 0
        m['log_sim_ratio'] = 0
    return m


actor1 = syn_id('russia')
actor2 = syn_id('china')

actor1_name = hg.get_label(actor1)
actor2_name = hg.get_label(actor2)

inter = mentions_intersection([actor1, actor2])
actor1_only = mentions_outside(actor1, inter)
actor2_only = mentions_outside(actor2, inter)
common_actors = actors_from_concepts(inter)

data = {'actors': [actor1_name, actor2_name],
        'common_concepts': {},
        'individual_concepts': {actor1_name: {}, actor2_name: {}},
        'common_actors': {}}

for concept in inter:
    data['common_concepts'][syn_label(concept)] = metrics_with_similarities(concept, actor1, actor2)

for concept in actor1_only:
    if entities[concept]['h'] > 1.:
        data['individual_concepts'][actor1_name][syn_label(concept)] = metrics_with_similarities(concept, actor1, actor2)

for concept in actor2_only:
    if entities[concept]['h'] > 1.:
        data['individual_concepts'][actor2_name][syn_label(concept)] = metrics_with_similarities(concept, actor1, actor2)
        
for actor in common_actors:
    if entities[concept]['h'] > 1.:
        data['individual_concepts'][actor2_name][syn_label(concept)] = metrics(concept)

for actor in common_actors:
    data['common_actors'][syn_label(actor)] = metrics(concept)

with open('china-russia.json', 'wt') as out:
    res = j.dump(data, out, sort_keys=False, indent=4, separators=(',', ': '))

print('INTER')
show_by_h(inter, decreasing=False)
print()

print(actor1)
show_by_h(actor1_only)
print()

print(actor2)
show_by_h(actor2_only)
print()

print('COMMON_ACTORS')
show_actors_by_h(common_actors, decreasing=False)
print()

INTER
no proof [1.7999999999999998]
vietnam [2.0]
own interests [2.0]
to consider [2.0]
infected with [3.0]
natural gas [3.0]
doubts [3.0]
has detained [3.2]
missile system [3.571428571428571]
workings [4.0]
proxy [4.0]
proofs [4.499999999999999]
kerry [4.5]
warships [4.571428571428571]
naval [5.333333333333333]
no plans [5.999999999999999]
submarine [6.230769230769231]
should be [6.4]
found in [6.4]
cutest [6.4]
to attack [7.0]
air force [7.0]
3,000 [7.0]
resolutions [7.999999999999998]
selves [8.0]
aircraft carrier [8.066666666666666]
mh17 [8.257142857142858]
2020 [8.894736842105262]
drills [8.999999999999998]
last month [8.999999999999998]
defenses [9.090909090909088]
exercising [9.941176470588234]
as it [9.999999999999996]
nukes [11.842105263157894]
violating [12.249999999999998]
has killed [12.959999999999992]
tensions [13.235294117647058]
zika [13.333333333333329]
summit [14.0]
investigations [14.28571428571428]
united states [14.2972972972973]
activists [14.44]
internets [15.114

illegal drugs [1.0]
opposition representatives [1.0]
serbs [1.0]
the syrian government has agreed in principle to attend an international peace conference proposed by russia and the us and criticised what it called attempts to undermine peace efforts [1.0]
the black sea peninsula 's annexation [1.0]
300 militants [1.0]
image editor [1.0]
arms treaty concerns [1.0]
of more than 3,000 us soldiers in poland [1.0]
nato troop rotations [1.0]
it loses billions to oil slump sanctions [1.0]
a321 [1.0]
may be cut [1.0]
separatist elections [1.0]
monkey with grenade [1.0]
damascus act of terrorism | reuters [1.0]
eu criticism over ukraine shift inappropriate [1.0]
convenient target [1.0]
response to us strike [1.0]
islamist insurgency chief [1.0]
army plan [1.0]
international peace conference [1.0]
kiev published [1.0]
when russian border guards opened fire on a fishing trawler [1.0]
greenpeace activists ' ship [1.0]
yemen ambassador was not shot [1.0]
all st petersburg metro bombing suspects [1

friday ria [1.0]
new uk aircraft carrier a convenient target [1.0]
party of war in kiev [1.0]
pilot of kazan crash [1.0]
nuclear inf treaty [1.0]
aggression by us [1.0]
at kiev 's request [1.0]
ancient palmyra [1.0]
foreign minister sergey lavrov [1.0]
controversial separatist elections [1.0]
relations in uncharted territory [1.0]
will resume selling gas to ukraine following an official request from kiev [1.0]
gas incident [1.0]
yacht detained by north korea [1.0]
sanctions over the black sea peninsula 's annexation [1.0]
threat to its own security [1.0]
a fishing trawler [1.0]
its strikes [1.0]
wills [1.0]
ukraine crisis trio talks [1.0]
unperturbed by china ’s missile deployment [1.0]
ukrainian affairs [1.0]
ashamed over sanctions [1.0]
would not hand back [1.0]
reduce oil smuggling [1.0]
planned in [1.0]
missile systems for syria [1.0]
a temporary lull in fighting declared by the syrian army in some western parts of the country [1.0]
undercover [1.0]
arms treaty [1.0]
international 

former commercial capital [1.0]
undercover intelligence mission [1.0]
act of aggression without u.n. vote [1.0]
should control the internet [1.0]
in the historic syrian city [1.0]
can examine [1.0]
no proof assad was behind chemical attack [1.0]
syria conflict with turkey iran [1.0]
has endangered [1.0]
more than 3,000 us soldiers in poland [1.0]
unperturbed [1.0]
sanctions ' destructive [1.0]
missiles from jordan [1.0]
france must fulfil mistral contract or return money agency [1.0]
fabricated by [1.0]
plane over egypt [1.0]
for washington strike government forces in syria [1.0]
ukraine shift [1.0]
if assad stays or steps down [1.0]
of isis recruiters [1.0]
fleet of warships is off australia for climate research [1.0]
ukraine ’s natural gas debt [1.0]
assad ready to support ceasefire [1.0]
controversial contract [1.0]
no decision on repayment [1.0]
blaming syrian rebels for a chemical weapons attack [1.0]
syrian government officials will attend geneva peace talks [1.0]
editors [1.0]
h

slowest pace [1.0]
underlining 's [1.0]
from invading chinese airspace [1.0]
minor incident [1.0]
foreign tech firms should not worry about draft law that requires them to turn over source code and build back doors [1.0]
the west should not implicate russia for mh17 attack [1.0]
hopes ties [1.0]
submarine power system [1.0]
in beijing attack [1.0]
mechanism with taiwan [1.0]
tension between the two [1.0]
using nuclear submarines [1.0]
it will not abandon north korea [1.0]
japan 's hype on air defence zone spreads tension [1.0]
willing to help iraq in any way it can [1.0]
will spend $ 275 billion [1.0]
over kashmir [1.0]
new homegrown aircraft carrier [1.0]
back new sanctions [1.0]
if they fail to meet fuel consumption requirements on passenger vehicles set for 2015 [1.0]
to nearly 100 deaths [1.0]
internet freedom [1.0]
it will never send military forces to the scene of an increasingly ugly spat with vietnam over an oil rig in the south china sea [1.0]
n. korean border – top us diploma

funeral strippers [1.0]
philippine fishermen [1.0]
5 trillion [1.0]
fuel consumption requirements on passenger vehicles set for 2015 [1.0]
most of the south china sea through which $ 5 trillion in ship borne trade passes every year [1.0]
killed 21 [1.0]
through which $ 5 trillion in ship borne trade passes every year [1.0]
the recently announced russian maritime doctrine [1.0]
kerry 's call for internet freedom naive [1.0]
found in last week 's xinjiang violence that killed 25 [1.0]
it easier for environmental groups to sue polluters [1.0]
horton [1.0]
any damage [1.0]
its shendiao drone [1.0]
hopes u.s. vietnam ties benefit regional peace [1.0]
consumption growth [1.0]
it will promote trade deals regardless of tpp rcep direction [1.0]
in submarine power systems [1.0]
to escape from [1.0]
it will stop taking organs from executed inmates npr [1.0]
us summit [1.0]
safety of its ships [1.0]
is fighters back from syria caught in xinjiang [1.0]
swimmer mack horton [1.0]
caihong-4 [1.0]
russ

NameError: name 'show_actors_by_h' is not defined

## Conflict Topics

In [29]:
sorted_conflict_topics = sorted([(entity, entities[entity]['c']) for entity in entities], key=operator.itemgetter(1), reverse=True)

sorted_conflict_topics

[('syria/syn11910', 45),
 ('ukraine/syn25', 23),
 ('warring/syn39477', 22),
 ('airing/syn17937', 17),
 ('gaza/syn199', 17),
 ('china/syn11901', 13),
 ('bordering/syn1880', 13),
 ('seas/syn98', 13),
 ('tensions/syn90', 12),
 ('eu/syn50', 11),
 ('south_china_sea/syn4186', 11),
 ('weaponed/syn7185033', 10),
 ('russia/syn11969', 10),
 ('aiding/syn71225', 9),
 ('bombings/syn330', 8),
 ('defenses/syn32607', 8),
 ('sanctions/syn311', 8),
 ('righted/syn558914', 8),
 ('palestinians/syn103', 7),
 ('iran/syn11989', 7),
 ('conflicts/syn84', 7),
 ('crises/syn19292', 7),
 ('war_crimes/syn12185', 7),
 ('human_rights/syn4210', 7),
 ('violating/syn13843', 7),
 ('cyber/syn30', 7),
 ('dispute/syn11', 7),
 ('crimea/syn476', 6),
 ('childs/syn1484485', 6),
 ('isis/syn8', 6),
 ('oils/syn764525', 6),
 ('paris/syn52', 6),
 ('islamic/syn148', 6),
 ('killings/syn236', 6),
 ('arming/syn30107', 6),
 ('droning/syn7176563', 6),
 ('planing/syn7181148', 6),
 ('visits/syn20453', 6),
 ('provocation/syn27243', 6),
 ('str

In [20]:
conflict_pairs = entities['syria/syn11910']['conflict_actors']
cactors = set()
for pair in conflict_pairs:
    cactors.add(syn_label(pair[0]))
    cactors.add(syn_label(pair[1]))

G = nx.DiGraph()
for actor in cactors:
    G.add_node(actor)
for edge in conflict_pairs:
    G.add_edge(syn_label(edge[0]), syn_label(edge[1]))
    
nx.write_gml(G, 'syria.gml')

## Factions

In [30]:
conflict_pairs = entities['syria/syn11910']['conflict_actors']

edges = {}
degrees = {}

for pair in conflict_pairs:
    edge = tuple(sorted(pair))
    if edge not in edges:
        edges[edge] = 0
    actor1, actor2 = edge
    if actor1 not in degrees:
        degrees[actor1] = 0
    degrees[actor1] += 1
    if actor2 not in degrees:
        degrees[actor2] = 0
    degrees[actor2] += 1
    
for edge in edges:
    edges[edge] = min([degrees[actor] for actor in edge])

sorted_edges = sorted([(edge[0], edge[1], edges[edge]) for edge in edges], key=operator.itemgetter(2), reverse=True)

factions = []
assigned = set()

def belongs_to_faction(actor, faction):
    for actor2 in faction:
        edge = tuple(sorted((actor, actor2)))
        if edge in edges:
            return False
    return True

def assign_faction(actor):
    if actor in assigned:
        return
    for faction in factions:
        if belongs_to_faction(actor, faction):
            faction.add(actor)
            assigned.add(actor)
            return
    factions.append({actor})
    assigned.add(actor)

for edge in sorted_edges:
    if len(factions) == 0:
        factions.append({edge[0]})
        factions.append({edge[1]})
        assigned.add(edge[0])
        assigned.add(edge[1])
    else:
        assign_faction(edge[0])
        assign_faction(edge[1])
        
factions

[{'ankara/syn20285',
  'china/syn11901',
  'damascus/syn16877',
  'hezbollah/syn13331',
  'iran/syn11989',
  'kerry/syn157',
  'moscow/syn12798',
  'putin/syn339',
  'russia/syn11969',
  'saudi/syn75',
  'saudi_king/syn54253',
  'u.n./syn370'},
 {'france/syn12543',
  'germany/syn12662',
  'israel/syn12009',
  'netanyahu/syn210',
  'obama/syn14636',
  'syria_opposition/syn387334',
  'turkey_pm/syn15206',
  'turkish_pm/syn40467',
  'u.s./syn26',
  'united_states/syn4267'},
 {'assad/syn15', 'turkeys/syn146090'}]

In [21]:
json_data = {}

for entity in entities:
    c = entities[entity]['c']
    if c > 1:
        cactors = {}
        for pair in entities[entity]['conflict_actors']:
            orig = syn_label(pair[0])
            targ = syn_label(pair[1])
            if orig not in cactors:
                cactors[orig] = 0
            if targ not in cactors:
                cactors[targ] = 0
            cactors[orig] -= 1
            cactors[targ] += 1
        topic = syn_label(entity)
        json_data[topic] = {'conflictuality': c, 'actors': cactors}
        
json_data

{'israel': {'conflictuality': 4,
  'actors': {'iran': -2,
   'russia': 2,
   'hamas': -1,
   'palestinian authority': 1,
   'u.s.': -1,
   'icc': 1}},
 'syria': {'conflictuality': 50,
  'actors': {'russia': -2,
   'u.s.': 10,
   'putin': -1,
   'iran': -5,
   'syria opposition': -1,
   'bomber': -1,
   'monitoring': 1,
   'hezbollah': -3,
   'israel': 4,
   'kerry': -1,
   'assad': 4,
   'turkeys': 0,
   'us and russia': 1,
   'damascus': -1,
   'west': 2,
   'turkey pm': -1,
   'turkish pm': -1,
   'saudi king': -1,
   'ankara': -1,
   'moscow': 0,
   'germany': -1,
   'obama': -1,
   'france': -1,
   'netanyahu': 1,
   'islamists': 1,
   'united states': 1,
   'president assad': -1,
   'u.n.': -1,
   'china': -1,
   'saudi': -1,
   'ukraine': 1}},
 'peopled': {'conflictuality': 3,
  'actors': {'pope': -1,
   'europe': 1,
   'amnesty': -1,
   'australia': 1,
   'u.n.': -1,
   'germany': 1}},
 'collapses': {'conflictuality': 2,
  'actors': {'eu': -1, 'china': 1, 'kerry': -1, 'israel': 

In [32]:
syn.synonyms(hg, 'us_and_russia/syn145926')
syn.synonyms(hg, 'turkeys/syn146090')
syn_label('turkeys/syn146090')

'turkey'