# Actor Networks

In [1]:
import spacy
from collections import Counter
import networkx as nx
nlp = spacy.load('en')

In [2]:
def read_paragraphs(fname):
    with open(fname, 'r') as f:
        text = f.read()
    paragraphs = [p for p in text.split('\n\n') if len(p) > 0]
    return paragraphs

trump_par_texts = read_paragraphs('nss/trump_nss.txt')
obama_par_texts = read_paragraphs('nss/obama_nss.txt')
#par_texts = trump_par_texts + obama_par_texts
#k = len(trump_par_texts)
#len(par_texts), len(trump_par_texts), len(obama_par_texts)
len(trump_par_texts), len(obama_par_texts)

(400, 150)

## Functions to Extract Subject-Verb-Object Triplets

In [3]:
def noun_verb_pairs(doc):
    nounverbs = list()
    for tok in doc:
        if tok.dep_ == 'ROOT':
            rel = (child_dep(tok,'nsubj'), tok, child_dep(tok,'dobj'))
            nounverbs.append(rel)
    return nounverbs

def child_dep(tok, dep): # gets first child where child.dep_==dep.
    for c in tok.children:
        if c.dep_ == dep:
            return c
    return None

## Function to Create Actor Network

In [4]:
def add_node_rel(G, utok, vtok):
    if utok is None or vtok is None:
        return
    
    # add nodes with zero count
    if utok.text not in G.nodes():
        G.add_node(utok.text, typ=utok.dep_, ent=utok.ent_type_, ct=0)
    if vtok.text not in G.nodes():
        G.add_node(vtok.text, typ=vtok.dep_, ent=utok.ent_type_, ct=0)
    
    # add edge with zero weight if it doesn't exist
    u,v = utok.text, vtok.text
    if (u,v) not in G.edges():
        G.add_edge(u, v, weight=0)
    
    # increment node counts and edge count
    G.nodes[u]['ct'] += 1
    G.nodes[v]['ct'] += 1
    G[u][v]['weight'] += 1

def actor_network(par_texts, nlp, min_node_ct=5):
    G = nx.DiGraph()
    for doc in nlp.pipe(par_texts):
        # merge multi-word entities
        for ent in doc.ents:
            ent.merge(tag=ent.root.tag_, ent_type=ent.root.ent_type_)
        
        for subj, verb, obj in noun_verb_pairs(doc):
            add_node_rel(G, subj,verb)
            add_node_rel(G, verb,obj)
            
    # remove nodes that don't meet minimum count threshold
    rm_nodes = list()
    for n in G.nodes():
        if G.nodes[n]['ct'] < min_node_ct:
            rm_nodes.append(n)
    G.remove_nodes_from(rm_nodes)
            
    return G

In [12]:
Gtrump = actor_network(trump_par_texts, nlp, min_node_ct=1)
Gobama = actor_network(trump_par_texts, nlp, min_node_ct=1)
len(Gtrump.nodes()), len(Gobama.edges())

(975, 1262)

In [13]:
nx.adjacency_matrix(Gtrump).todense().shape

(975, 975)

In [11]:
list(Gtrump.nodes(data=True))[0]

('America', {'typ': 'nsubj', 'ent': 'GPE', 'ct': 10})

In [7]:
nx.write_gexf(Gtrump, 'trump.gexf')
nx.write_gexf(Gobama, 'obama.gexf')