In [42]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

In [43]:
import networkx as nx
import seaborn as sns
from pyvis.network import Network
import random

In [44]:
## Input data directory
##########################################################
input_file_name = "Saxony_Eastern_Expansion_EP_96.txt"
input_file_name = "Harry_Potter_1_chapter_1.txt"
##########################################################
data_dir = "HotG_Data/"+input_file_name
inputdirectory = Path(f"./{data_dir}")

## This is where the output csv files will be written
#out_dir = data_dir
#outputdirectory = Path(f"./data_output/{out_dir}")
outputdirectory = Path(f"./data_output")

In [45]:
output_graph_file_name = f"graph_{input_file_name[:-4]}.csv"
output_graph_file_with_path = outputdirectory/output_graph_file_name

output_chunks_file_name = f"chunks_{input_file_name[:-4]}.csv"
output_chunks_file_with_path = outputdirectory/output_chunks_file_name

output_context_prox_file_name = f"graph_contex_prox_{input_file_name[:-4]}.csv"
output_context_prox_file_with_path = outputdirectory/output_context_prox_file_name

print(output_graph_file_with_path)
print(output_chunks_file_with_path)
print(output_context_prox_file_with_path)

data_output/graph_Harry_Potter_1_chapter_1.csv
data_output/chunks_Harry_Potter_1_chapter_1.csv
data_output/graph_contex_prox_Harry_Potter_1_chapter_1.csv


In [46]:
dfg1 = pd.read_csv(output_graph_file_with_path, sep=";")

In [47]:
dfg2= pd.read_csv(output_context_prox_file_with_path, sep=";")

In [48]:
dfg = pd.concat([dfg1, dfg2], axis=0)
dfg = (
    dfg.groupby(["node_1", "node_2"])
    .agg({"chunk_id": ",".join, "edge": ','.join, 'count': 'sum'})
    .reset_index()
)
dfg

Unnamed: 0,node_1,node_2,chunk_id,edge,count
0,'potter',mr. dursley,"5e6a39886ae44e48ade45448fc9051ca,5e6a39886ae44...",contextual proximity,2.0
1,'potter',mrs. dursley,"5e6a39886ae44e48ade45448fc9051ca,5e6a39886ae44...",contextual proximity,2.0
2,'potter',petunia,"5e6a39886ae44e48ade45448fc9051ca,5e6a39886ae44...",contextual proximity,2.0
3,'potter',sister of petunia,"5e6a39886ae44e48ade45448fc9051ca,5e6a39886ae44...",The name 'Potter' is related to Petunia's sist...,3.0
4,a piercing stare,are dead,"26c5b0d9c07d41e7baccd8e596c93446,26c5b0d9c07d4...",contextual proximity,2.0
...,...,...,...,...,...
789,you-know-who,dumbledore,"17c96105c1404e8fa8440d826854b0dd,17c96105c1404...",contextual proximity,2.0
790,you-know-who,powers,"17c96105c1404e8fa8440d826854b0dd,17c96105c1404...",contextual proximity,2.0
791,you-know-who,professor mcgonagall,"17c96105c1404e8fa8440d826854b0dd,17c96105c1404...",contextual proximity,2.0
792,you-know-who,voldemort,"17c96105c1404e8fa8440d826854b0dd,17c96105c1404...",You-Know-Who is a nickname or alias for the ac...,2.0


In [49]:
nodes = pd.concat([dfg['node_1'], dfg['node_2']], axis=0).unique()
nodes.shape

(192,)

In [50]:
G = nx.Graph()

## Add nodes to the graph
for node in nodes:
    G.add_node(
        str(node)
    )

## Add edges to the graph
for index, row in dfg.iterrows():
    G.add_edge(
        str(row["node_1"]),
        str(row["node_2"]),
        title=row["edge"],
        weight=row['count']/4
    )

In [51]:
communities_generator = nx.community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
communities = sorted(map(sorted, next_level_communities))
print("Number of Communities = ", len(communities))
print(communities)

Number of Communities =  12
[["'potter'", 'bakery', 'car', 'cat', 'collecting tin', "daughter's problems", 'driving', 'dudley', "dudley's age", 'fear', 'five different people', 'frozen in armchair', 'grunnings', 'grunnings parking lot', 'half past eight', 'happy', 'harry', 'he', "heard the name 'potter'", 'howard/harry', 'learning new word', 'lily potter (implied)', 'looking at the sign', 'lunchtime', 'map', 'morning', 'mr. dursley', "mr. dursley's garden wall", 'mrs. dursley', "mrs. dursley's sister's husband", 'mrs. next door', "mrs. potter's family", 'muggle', 'nasty, common name', "nation's owls", 'night hunting', 'normal behavior', 'normal day', 'not young', 'noticing strange people', 'owl-free', 'owls', 'people down in the street', 'people in cloaks', 'petunia', 'potters', 'secret', 'several important telephone calls', 'sister of petunia', 'son, harry', 'strangely dressed people', 'stranger', 'tabby cat', 'the dursleys', 'the potters', "the potters' son", "the potters, that's rig

In [52]:
palette = "hls"

## Now add these colors to communities and make another dataframe
def colors2Community(communities) -> pd.DataFrame:
    ## Define a color palette
    p = sns.color_palette(palette, len(communities)).as_hex()
    random.shuffle(p)
    rows = []
    group = 0
    for community in communities:
        color = p.pop()
        group += 1
        for node in community:
            rows += [{"node": node, "color": color, "group": group}]
    df_colors = pd.DataFrame(rows)
    return df_colors


colors = colors2Community(communities)
colors

Unnamed: 0,node,color,group
0,'potter',#db57d3,1
1,bakery,#db57d3,1
2,car,#db57d3,1
3,cat,#db57d3,1
4,collecting tin,#db57d3,1
...,...,...,...
187,weatherman,#91db57,10
188,scars,#57db5f,11
189,usefulness,#57db5f,11
190,strange and mysterious things happening all ov...,#a157db,12


In [53]:
for index, row in colors.iterrows():
    G.nodes[row['node']]['group'] = row['group']
    G.nodes[row['node']]['color'] = row['color']
    G.nodes[row['node']]['size'] = G.degree[row['node']]

In [54]:
#graph_output_directory = "./docs/index.html"
net = Network(
    notebook=True,
    # bgcolor="#1a1a1a",
    cdn_resources="remote",
    height="900px",
    width="100%",
    select_menu=True,
    # font_color="#cccccc",
    filter_menu=False,
)

In [55]:
net.from_nx(G)
# net.repulsion(node_distance=150, spring_length=400)
net.force_atlas_2based(central_gravity=0.015, gravity=-31)
# net.barnes_hut(gravity=-18100, central_gravity=5.05, spring_length=380)
net.show_buttons(filter_=["physics"])

In [56]:
# net.show(graph_output_directory)
net.show("graph.html")

graph.html
