## Preamble

In [None]:
!curl -s "https://raw.githubusercontent.com/Yoonsen/Modules/master/module_update.py" > "module_update.py"

In [None]:
from module_update import update, css, code_toggle, printmd

In [None]:
css()

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
update('graph_networkx_louvain')
import nbtext as nb
from nbtext import get_urn, make_graph, get_urn, relaterte_ord, navn, totals
import graph_networkx_louvain as gnl
%matplotlib inline

### Utility functions

In [None]:
# tm_red is a reduced token map
coord_name = lambda names, tm_red: [[na for na in names[0] if (na,) in [z[0] for z in tm_red ]]] + [list(set(na) & set([z[0] for z in tm_red])) for na in names[1:]]

token_map_names = lambda tmap: [
    [z[0][0] for z in nb.token_map_to_tuples(tmap) if len(z[0]) == 1]
] + [
    [z[0] for z in nb.token_map_to_tuples(tmap) if len(z[0]) == 2]
] + [
    [z[0] for z in nb.token_map_to_tuples(tmap) if len(z[0]) == 3]
]+ [
    [z[0] for z in nb.token_map_to_tuples(tmap) if len(z[0]) == 4]
]

# create a character network with only tokens in tokenmap
# see nb.make_network_name_graph in nbtext
character_network = lambda urn, tm: nb.make_network_name_graph(urn, token_map_names(tm), tokenmap = nb.token_map_to_tuples(tm))


# edge counts
edge_counts = lambda G, count=1: [edges for edges in G.edges(data=True) if edges[2]['weight'] <= count]

# new edges
remove_edges = lambda G, count=1: [(edges[0], edges[1]) for edges in G.edges(data=True) if edges[2]['weight'] > count]

## Name graphs

Here we are looking for Tolstoj's War and Peace

In [None]:
# Fetch URNs

urns = get_urn({
    'author':'masterson%lou%', 
    'year':1900, 
    'neste':160,
    'limit':100,
    'title':'%%'
    
})
urns

The largest seems to be 2011061508082, so we use that one.

In [None]:
BOOK = 2008082104049

## Find names, everything with capital letters...


Command for finding names is `names()`. It returns a tuple of name sequences, we only use single names for this part, so select the first part.

In [None]:
wp = nb.names(BOOK, cutoff=3)
print(wp)

## Check some names with concordance

## Only single word names can be checked (multi word in the making....)

In [None]:
nb.urn_concordance(urns=BOOK, word='Theresa')

## check tokens with command `nb.token_map(character_names, strings = Bool)` the value of parameter `strings` can be `True` or `False`, default is `False` :

In [None]:
nb.token_map(wp)

In [None]:
nb.token_map(wp, strings=True)

## There are three commands for creating a graph

### Make a network using a a list of words applied to a book
### `nb.make_network(bookref, list_of_words)`

### Network with a set of tokens (from `nb.names`), and an optional token map, cutoff is also optional
###  `nb.make_network_name_graph(bookref, tokens, token_map, cutoff)`

### Network using values from token_map. Only tokens in the token map are analyzed
###  `character_network(bookref, tokens, token_map)`

In [None]:
# Here we create a list of tokens
graf = nb.make_network(BOOK, list(wp[0].keys()))

# Draw graph with `gnl.show_graph(Graph, spread=0.2)` 

In [None]:
gnl.show_graph(graf, spread=1.8)

In [None]:
g2 = nb.make_network_name_graph(BOOK, wp, cutoff = 5)

In [None]:
gnl.show_graph(g2)

# Draw with an edited token map

In [None]:
tokenmap = nb.token_map(wp, strings=True)
tokenmap

## copy the output from above an prepare it for edits:

In [None]:
name_edits = ['Balsa ==> Balsa_Hermanos-brødrene',
 'City ==> Mexico_City',
 'Cordoba ==> Cordoba',
 #'Dem ==> Dem',
 #'Deres ==> Deres',
 'Diablito ==> El_Diablito',
 #'Din ==> Din',
 'El ==> El_Hule',
 #'Er ==> Er',
 #'Godt ==> Godt',
 'Hermanos ==> Balsa_Hermanos',
 'Hermanos-brødrene ==> Balsa_Hermanos-brødrene',
 'Hermosillo ==> Hermosillo',
 'Herrera ==> Herrera',
 #'Hjelp ==> Hjelp',
 #'Hm ==> Hm',
 #'Hold ==> Hold',
 'Hondura ==> Hondura',
 'Hule ==> El_Hule',
 #'Ja ==> Ja',
 'Jefe ==> El_Jefe',
 'Joaquin ==> Joaquin',
 #'Kan ==> Kan',
 #'Kom ==> Kom',
 'Lara ==> Lara',
 'Lesaca ==> Luis_Lesaca',
 'Luis ==> Luis_Lesaca',
 'Låra ==> Låra',
 'Madonna ==> Madonna',
 'Maria ==> Theresa_Maria_Silveti',
 'Mendez ==> Mendez',
 'Mexico ==> Mexico_City',
 'Michelangelo ==> Michelangelo',
 'Mirador ==> El_Mirador',
 'Nanche ==> Nanche',
 #'Nr ==> Nr',
 'Nådens ==> Nådens',
 'Pedrito ==> Pedrito',
 'Peter ==> Peter',
 'Pla ==> Senor_Pla',
 'Royal ==> Valle_Royal',
 'Sanchez ==> Maria_Sanchez',
 'Senor ==> Senor_Sanchez',
 #'Si ==> Si',
 'Silveti ==> Theresa_Maria_Silveti',
 #'Ta ==> Ta',
 #'Takk ==> Takk',
 'Theresa ==> Theresa_Maria_Silveti',
 'Tresgallo ==> Tresgallo',
 'Tuztepec ==> Tuztepec',
 #'Unnskyld ==> Unnskyld',
 'Valle ==> Valle_Royal',
 'Veracruz ==> Veracruz',
 'Veronica ==> Veronica',
 'Valle_Royal ==> Valle_Royal',
 'El_Hule ==> El_Hule',
 'Balsa_Hermanos-brødrene ==> Balsa_Hermanos-brødrene',
 'Balsa_Hermanos ==> Balsa_Hermanos',
 'Mexico_City ==> Mexico_City',
 'Maria_Sanchez ==> Maria_Sanchez',
 'Theresa_Maria ==> Theresa_Maria_Silveti',
 'Senor_Sanchez ==> Senor_Sanchez',
 'El_Mirador ==> El_Mirador',
 'Senor_Pla ==> Senor_Pla',
 'Luis_Lesaca ==> Luis_Lesaca',
 'El_Diablito ==> El_Diablito',
 'Maria_Theresa ==> Theresa_Maria_Silveti',
 'Maria_Silveti ==> Theresa_Maria_Silveti',
 'El_Jefe ==> El_Jefe',
 'Theresa_Maria_Silveti ==> Theresa_Maria_Silveti']

In [None]:
token_map_names(name_edits)

In [None]:
g_edits = character_network(BOOK,  name_edits)

In [None]:
gnl.show_graph(g_edits)

Growth diagrams plot the sequential development of the characters. Experiment with different values of parameters.

In [None]:
g2.edges(data=True)

## Check if there are edges below a certain count

In [None]:
edge_counts(g2, 6)

## Draw graph without those edges

Experiment with count parameter - green number

In [None]:
gnl.show_graph(g2.edge_subgraph(remove_edges(g2, 15)))

# Remove edges below a certain count

In [None]:
remove_edges(g2,6)

In [None]:
gnl.show_communities(g2)

In [None]:
gnl.community_dict(g2)

In [None]:
wp_diagram = nb.plot_book_wordbags(BOOK,gnl.community_dict(g2) ,window=20000)

In [None]:
wp_diagram.rolling(window=10).mean().plot(figsize=(15,6), lw=3, alpha=0.7)

In [None]:
nb.plot_sammen_vekst??