# One Hundred Years of Solitude - Python code
[https://medium.com/@finalfire](https://medium.com/@finalfire)

In [2]:
# from tinydb import TinyDB, Query

# db = TinyDB('characters.json')

# read all of the characters from the list
# the list has been created by POS-tagging all of the file 'text'
# using nltk
# with open('characters_list.txt') as in_file:
#     chars = [row.strip() for row in in_file]
# chars

In [2]:
%matplotlib inline
import re
import nltk


def build_regexp(c):
    """
    Creates the appropriate regex for the expressions
    N(M) or N,M
    """
    if '(' in c:
        a, b = c.split(' ')
        b = b[1:-1]
        r = r"{}( {})?".format(a, b)
        return r
    if ',' in c:
        a, b = c.split(',')
        r = r"({}|{})".format(a, b)
        return r    
    return r"{}".format(c)


## save everything
with open('characters_list.txt') as x:
    characters = [l.strip() for l in x]

with open('text') as x:
    text = [l.strip() for l in x]
raw = ' '.join(text)

## map the characters
chars = {}

for it, c in enumerate(characters):
    chars[it] = c    
    template = '{}'.format(it)
    regexp = re.compile(build_regexp(c))
    raw = re.sub(regexp, template, raw)

In [3]:
import networkx as nx

print('Total characters:', len(characters))

# build the graph
g = nx.Graph()
# add nodes
for c in characters:
    g.add_node(c)

# tokenize the text
words = [n for n in nltk.word_tokenize(raw) if n != ',' and n != '.']

# utils list
characters_rep = [str(i) for i in range(len(characters))]

# forward threshold
fwd_t = 30
# check for each character
for it, c in enumerate(characters):
    for i, word in enumerate(words):
        if word == str(it):
            for d in range(i, i + fwd_t + 1):
                if d < len(words):
                    if words[d] in characters_rep and words[d] != word:
                        src_node = chars[int(word)]
                        target_node = chars[int(words[d])]
                        if target_node not in g[src_node]:
                            g.add_edge(src_node, target_node, weight=1)
                        else:
                            g[src_node][target_node]['weight'] += 1

# remove nodes w/o edges
removed = set()
for node in g.nodes():
    if not g[node]:
        print('Node w/o edges:', node)
        g.remove_node(node)
        removed.add(node)

print('Total characters minus solitude nodes:', len(g.nodes()))

nx.write_graphml(g, 'output.graphml')

Total characters: 71
Node w/o edges: Arnaldo
Node w/o edges: Colonel Gerineldo Márquez
Node w/o edges: Petronila Iguarán
Node w/o edges: Amparo Mos
Node w/o edges: Colonel Gregorio Stevenson
Node w/o edges: Doña Fernanda del Carpio de Buendía
Node w/o edges: Don Melchor Escalona
Node w/o edges: Dr. Alirio Noguera
Total characters minus solitude nodes: 62


In [23]:
print('Diameter:', nx.diameter(g))
print('Degree centrality:', nx.degree_centrality(g))
print('Betweennes centrality:', nx.betweenness_centrality(g))
#print('Closeness centrality:', nx.closeness_centrality(g))
#print('Pagerank:', nx.pagerank(g))

Diameter: 6
Degree centrality: {'Dagoberto Fonseca': 0.01639344262295082, 'Nicanor Ulloa': 0.03278688524590164, 'Captain Aquiles Ricardo': 0.03278688524590164, 'Captain Roque Carnicero': 0.03278688524590164, 'Mauricio Babilonia': 0.06557377049180328, 'Fernanda del Carpio': 0.04918032786885246, 'Francisco the Man': 0.11475409836065574, 'Pilar Ternera': 0.18032786885245902, 'Pietro Crespi': 0.1639344262295082, 'Don Apolinar Moscote': 0.14754098360655737, 'Mr. Herbert': 0.04918032786885246, 'Gaston': 0.04918032786885246, 'Úrsula (Iguarán)': 0.5409836065573771, 'Monk Hermann': 0.0819672131147541, 'Gabriel': 0.09836065573770492, 'Amaranta': 0.4426229508196722, 'Aureliano Triste': 0.11475409836065574, 'Petronio': 0.01639344262295082, 'Patricia Brown': 0.06557377049180328, 'José Raquel Moncada': 0.04918032786885246, 'Aureliano Serrador': 0.04918032786885246, 'Rebeca': 0.3114754098360656, 'Remedios,Meme': 0.42622950819672134, 'Carmelita Montiel': 0.03278688524590164, 'Nigromanta': 0.0327868852



In [32]:
# save to neo4j
from py2neo import Graph, Node, Relationship

g4 = Graph(user="neo4j", password="")
tx = g4.begin()

V = {}

# create nodes
for c in g.nodes():
    V[c] = Node("Character", name=c)
    tx.create(V[c])

# save edges
for i, a in enumerate(g.nodes()):
    for j, b in enumerate(g.nodes()):
        if i < j:
            if b in g[a]:
                #w = g[a][b]['weight']
                tx.create(Relationship(V[a], 'INTERACTS', V[b]))

tx.commit()