In [1]:
import pandas as pd
import networkx as nx
import igviz as ig

In [2]:
characters_df = pd.read_csv('../data/DorianGray/processed_characters.csv')
characters_df

Unnamed: 0.1,Unnamed: 0,COREF,start_token,end_token,prop,cat,text,character_name
0,4,112,77,79,PROP,PER,Lord Henry Wotton,Lord Henry Wotton
1,6,112,156,156,PRON,PER,him,Lord Henry Wotton
2,14,113,289,290,PROP,PER,Basil Hallward,Basil Hallward
3,27,113,396,396,PROP,PER,Basil,Basil Hallward
4,29,112,408,409,PROP,PER,Lord Henry,Lord Henry Wotton
...,...,...,...,...,...,...,...,...
7148,14196,307,95668,95668,PRON,PER,he,Alan Campbell
7149,14197,307,95674,95674,PRON,PER,He,Alan Campbell
7150,14215,115,95814,95816,PROP,PER,Mr. Dorian Gray,Mr. Dorian Gray
7151,14232,268,95885,95885,PROP,PER,Francis,Francis Osborne


In [3]:
tokens_df = pd.read_csv('../data/DorianGray/character-tagging/DorianGray.tokens', delimiter='\t')
tokens_df

Unnamed: 0,paragraph_ID,sentence_ID,token_ID_within_sentence,token_ID_within_document,word,lemma,byte_onset,byte_offset,POS_tag,fine_POS_tag,dependency_relation,syntactic_head_ID,event
0,0,0,0,0,The,the,0,3,DET,DT,det,1,O
1,0,0,1,1,studio,studio,4,10,NOUN,NN,nsubjpass,3,O
2,0,0,2,2,was,be,11,14,AUX,VBD,auxpass,3,O
3,0,0,3,3,filled,fill,15,21,VERB,VBN,ROOT,3,O
4,0,0,4,4,with,with,22,26,ADP,IN,prep,3,O
...,...,...,...,...,...,...,...,...,...,...,...,...,...
96039,1490,6159,11,96039,recognized,recognize,426538,426548,VERB,VBD,relcl,96036,O
96040,1490,6159,12,96040,who,who,426549,426552,PRON,WP,attr,96042,O
96041,1490,6159,13,96041,it,it,426553,426555,PRON,PRP,nsubj,96042,O
96042,1490,6159,14,96042,was,be,426556,426559,AUX,VBD,ccomp,96039,O


In [4]:
# Paragraph based network

characters_paragraph_mention_df = characters_df.copy()
characters_paragraph_mention_df['paragraph'] = characters_paragraph_mention_df.start_token.map(
    lambda x: tokens_df.iloc[x].paragraph_ID
)
characters_paragraph_mention_df

Unnamed: 0.1,Unnamed: 0,COREF,start_token,end_token,prop,cat,text,character_name,paragraph
0,4,112,77,79,PROP,PER,Lord Henry Wotton,Lord Henry Wotton,1
1,6,112,156,156,PRON,PER,him,Lord Henry Wotton,1
2,14,113,289,290,PROP,PER,Basil Hallward,Basil Hallward,2
3,27,113,396,396,PROP,PER,Basil,Basil Hallward,4
4,29,112,408,409,PROP,PER,Lord Henry,Lord Henry Wotton,4
...,...,...,...,...,...,...,...,...,...
7148,14196,307,95668,95668,PRON,PER,he,Alan Campbell,1483
7149,14197,307,95674,95674,PRON,PER,He,Alan Campbell,1483
7150,14215,115,95814,95816,PROP,PER,Mr. Dorian Gray,Mr. Dorian Gray,1486
7151,14232,268,95885,95885,PROP,PER,Francis,Francis Osborne,1488


In [5]:
paragraph_windows = 10
paragraphs_per_window = characters_paragraph_mention_df.paragraph.max() // paragraph_windows

sizes = paragraph_windows * [paragraphs_per_window]
for i in range(characters_paragraph_mention_df.paragraph.max() - sum(sizes)):
    sizes[i] += 1

In [17]:
graphs = []

G = nx.Graph()

curr_graph = 0

for paragraph, sub_df in characters_paragraph_mention_df.groupby(characters_paragraph_mention_df.paragraph):
    
    if paragraph > sizes[curr_graph] + sum(sizes[:curr_graph]):
        graphs.append(G)
        curr_graph += 1
        G = nx.Graph()
    
    if sub_df.character_name.unique().size == 1:
        continue
    
    pairs = []
    for i, name in enumerate(sub_df.character_name.unique()):
        
        if name not in G.nodes:
            G.add_node(name, connections=sub_df.character_name.unique().size - 1)
        else:
            G.nodes[name]['connections'] += sub_df.character_name.unique().size - 1

        for o_name in sub_df.character_name.unique()[i + 1:]:
            
            if o_name not in G.nodes:
                G.add_node(o_name, connections=0)
            if (name, o_name) not in G.edges:
                G.add_edge(name, o_name, weight=1)
            else:
                G.edges[name, o_name]['weight'] += 1

In [39]:
window = 4

color_list = []
sizing_list = []

G = graphs[window]

for node in G.nodes():
    size_and_color = G.nodes[node]['connections']

    color_list.append(size_and_color)
    sizing_list.append(size_and_color)

ig.plot(
    G,
    title=f"Window {window} Social Network",
    layout="spring",
    size_method=sizing_list, # Makes node sizes the size of the "prop" property
    color_method=color_list, # Colors the nodes based off the "prop" property and a color scale,
    edge_label="weight", # Display the "edge_prop" attribute on the edge
    edge_label_position="bottom center", # Display the edge label below the edge
    node_text=['connections']
)


Argument `titlefont_size` is deprecated and will be removed in 0.6.0.



FigureWidget({
    'data': [{'hoverinfo': 'text',
              'line': {'color': '#888', 'width': 1},
              'mode': 'lines+text',
              'text': [],
              'type': 'scatter',
              'uid': 'bd02d3e9-d8e1-487c-ba13-596378793dc0',
              'x': [0.004206855807078213, 0.2226619468873262, None,
                    0.004206855807078213, 0.027779128928134958, None,
                    0.004206855807078213, 0.10676095538771309, None,
                    0.004206855807078213, -0.22675343634127856, None,
                    0.004206855807078213, -0.01583291865957878, None,
                    0.2226619468873262, 0.027779128928134958, None,
                    0.2226619468873262, -0.22675343634127856, None,
                    0.2226619468873262, 0.10676095538771309, None,
                    0.2226619468873262, -1.0, None, 0.2226619468873262,
                    0.8811774679906058, None, 0.027779128928134958,
                    -0.22675343634127856, None, 0.0