In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

In [14]:
import networkx as nx
import seaborn as sns
from pyvis.network import Network
import random

In [3]:
## Input data directory
##########################################################
input_file_name = "Saxony_Eastern_Expansion_EP_95-96.txt"
##########################################################
data_dir = "HotG_Data/"+input_file_name
inputdirectory = Path(f"./{data_dir}")

## This is where the output csv files will be written
#out_dir = data_dir
#outputdirectory = Path(f"./data_output/{out_dir}")
outputdirectory = Path(f"./data_output")

In [4]:
output_graph_file_name = f"graph_{input_file_name[:-4]}.csv"
output_graph_file_with_path = outputdirectory/output_graph_file_name

output_chunks_file_name = f"chunks_{input_file_name[:-4]}.csv"
output_chunks_file_with_path = outputdirectory/output_chunks_file_name

output_context_prox_file_name = f"graph_contex_prox_{input_file_name[:-4]}.csv"
output_context_prox_file_with_path = outputdirectory/output_context_prox_file_name

print(output_graph_file_with_path)
print(output_chunks_file_with_path)
print(output_context_prox_file_with_path)

data_output/graph_Saxony_Eastern_Expansion_EP_95-96.csv
data_output/chunks_Saxony_Eastern_Expansion_EP_95-96.csv
data_output/graph_contex_prox_Saxony_Eastern_Expansion_EP_95-96.csv


In [5]:
dfg1 = pd.read_csv(output_graph_file_with_path, sep=";")

In [8]:
dfg2= pd.read_csv(output_context_prox_file_with_path, sep=";")

In [9]:
dfg = pd.concat([dfg1, dfg2], axis=0)
dfg = (
    dfg.groupby(["node_1", "node_2"])
    .agg({"chunk_id": ",".join, "edge": ','.join, 'count': 'sum'})
    .reset_index()
)
dfg

Unnamed: 0,node_1,node_2,chunk_id,edge,count
0,890,bavarian church,"a96d5cd24cfb457ba2719b2cd491cebd,a96d5cd24cfb4...",contextual proximity,3.0
1,890,cyrill and st. method,"a96d5cd24cfb457ba2719b2cd491cebd,a96d5cd24cfb4...",contextual proximity,3.0
2,890,moravian church,"a96d5cd24cfb457ba2719b2cd491cebd,a96d5cd24cfb4...",contextual proximity,2.0
3,890,moravian slavic church,a96d5cd24cfb457ba2719b2cd491cebd,"By 890, this period of Moravian Slavic church ...",0.0
4,890,pope,"a96d5cd24cfb457ba2719b2cd491cebd,a96d5cd24cfb4...",contextual proximity,2.0
...,...,...,...,...,...
4850,zietz and merseburg,margraviate of meissen,"d31f0ca979794b338040f3acce7c96ed,d31f0ca979794...",Zietz and Merseburg were quickly subsumed into...,2.0
4851,zietz and merseburg,subsumed into the margraviate of meissen,d1d7da0a4f3642a4aaf7ce79eef9e2a7,"The former two, Zietz and Merseburg were relat...",0.0
4852,zone of influence,centre of power,"bb02574fdae44dc2b524737744bde0c0,bb02574fdae44...",contextual proximity,2.0
4853,zone of influence,geographical area,bb02574fdae44dc2b524737744bde0c0,The first Polish ruler's zone of influence ran...,0.0


In [10]:
nodes = pd.concat([dfg['node_1'], dfg['node_2']], axis=0).unique()
nodes.shape

(791,)

In [11]:
G = nx.Graph()

## Add nodes to the graph
for node in nodes:
    G.add_node(
        str(node)
    )

## Add edges to the graph
for index, row in dfg.iterrows():
    G.add_edge(
        str(row["node_1"]),
        str(row["node_2"]),
        title=row["edge"],
        weight=row['count']/4
    )

In [12]:
communities_generator = nx.community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
communities = sorted(map(sorted, next_level_communities))
print("Number of Communities = ", len(communities))
print(communities)

Number of Communities =  21
[['890', 'an unassuming corner of the underground church', 'apostles to the slavs', 'around the year 100 ad', 'basilica de san clemente', 'bavarian church', 'bavarian missionaries', 'bohemian warlords', 'brothers', 'brougth san clemente to rome', 'carolingians', 'church liturgy', 'church slavonic', 'civil war', 'constantine', 'constantine and methodius', "constantine's original resting place", 'constantinople', 'covered in dedications from all slavic nations as well as others', 'cyrill', 'cyrill and st. method', 'dedicated basilica in rome', 'died in rome', 'east francians', 'emperor in constantinople', "found san clemente's body on the beach in crimea", 'franks', 'glagolica', 'great moravia', 'greek orthodox tradition', 'house of premyslid', 'invented glagolica', 'irritated the emperor trajan', 'konstantine and method', 'kyrillic alphabet', 'method', 'moravian church', 'moravian church services', 'moravian slavic church', 'moravians', 'one of my all-time fa

In [15]:
palette = "hls"

## Now add these colors to communities and make another dataframe
def colors2Community(communities) -> pd.DataFrame:
    ## Define a color palette
    p = sns.color_palette(palette, len(communities)).as_hex()
    random.shuffle(p)
    rows = []
    group = 0
    for community in communities:
        color = p.pop()
        group += 1
        for node in community:
            rows += [{"node": node, "color": color, "group": group}]
    df_colors = pd.DataFrame(rows)
    return df_colors


colors = colors2Community(communities)
colors

Unnamed: 0,node,color,group
0,890,#dbd057,1
1,an unassuming corner of the underground church,#dbd057,1
2,apostles to the slavs,#dbd057,1
3,around the year 100 ad,#dbd057,1
4,basilica de san clemente,#dbd057,1
...,...,...,...
786,secular state infrastructure,#57db85,19
787,regain their freedom,#579bdb,20
788,weaking of the empire,#579bdb,20
789,surgeons,#db5775,21


In [16]:
for index, row in colors.iterrows():
    G.nodes[row['node']]['group'] = row['group']
    G.nodes[row['node']]['color'] = row['color']
    G.nodes[row['node']]['size'] = G.degree[row['node']]

In [17]:
#graph_output_directory = "./docs/index.html"
net = Network(
    notebook=True,
    # bgcolor="#1a1a1a",
    cdn_resources="remote",
    height="900px",
    width="100%",
    select_menu=True,
    # font_color="#cccccc",
    filter_menu=False,
)

In [18]:
net.from_nx(G)
# net.repulsion(node_distance=150, spring_length=400)
net.force_atlas_2based(central_gravity=0.015, gravity=-31)
# net.barnes_hut(gravity=-18100, central_gravity=5.05, spring_length=380)
net.show_buttons(filter_=["physics"])

In [19]:
# net.show(graph_output_directory)
net.show("graph.html")