In [24]:
'''
Social network visualization
The network() and graph()  function code are adapted from https://earlyprint.org/jupyterbook/metadata.html
'''
import re
import pandas as pd
from pyvis.network import Network

def authors(csv):
    '''
    Extract text IDs and standardize authors' names.
    '''
    df = pd.read_csv(csv)
    authors = df['author']
    ids = df['id']
    numFiles = len(ids)
    count = 0
    dict = {}
    while count < numFiles:
        words = set(authors[count].split(';'))
        # removing unnecessary keywords
        words.discard('')
        # Removing unnecessary dates  
        newWords = []
        for w in words: 
            w = w.replace('.','')
            # w = re.sub(r'\([^)]*\)','',w)
            if re.search('printer',w):
                continue
            w = re.sub(r' ca|-|[0-9]{4}|, Sir,|, Sir|fl|, d| d \?|,  A profession of faith|, Mr','',w)
            w.replace(r"\?",'')
            # w = re.sub(r', Earl of,  or 8|, ca|,  aut|, \?|,  Of schisme|, 16th/17th cent|, attributed name','',w)
            # w = re.sub(r',   aut|, bachelor of civil law| Tragedie of Darius| Monarchick tragedies|Doomesday| Paraenesis to the Prince|,  A profession of faith','',w)
            # w = w.strip('|,|, ')
            newWords.append(w)
        dict[ids[count]] = list(set(newWords))
        count += 1
    return dict 

def network(dict):
    '''
    Catalogs edges between nodes and outputs the visualization as a HTML file. 
    '''
    edgelist = []    
    for id in dict.keys():
        for name in dict[id]:
            edgelist.append((id, name))
    return edgelist

def graph(edgelist,title):
    # Create pyvis graph
    g = Network(width=1200,height=800,notebook=True,heading='')
    for id,auth in edgelist:
        if id == 'Anonymous': continue
        g.add_node(id, id, title=id, color='purple')
        g.add_node(auth, auth, title=auth, color='pink')    
        g.add_edge(id,auth)
    g.show(title+".html")

In [25]:
files = ['/srv/data/metadata/tuning/EPtuning.csv',
        '/srv/data/metadata/tuning/spreadsheet.csv',
        '/srv/data/metadata/tuning/keywords.csv',
        '/srv/data/metadata/tuning/people.csv']
auths = {}
for csvFile in files:
    auths.update(authors(csvFile))
print(auths)

{'A01932': ['Gordon, Robert d ?'], 'A02495': ['Anonymous'], 'A03149': ['Heylyn, Peter, '], 'A04813': ['Kemys, Lawrence ', ' Kemys, Lawrence '], 'A07886': ['Mun, Thomas, '], 'A10821': ['Roberts, Lewes, ', ' Dalen, Cornelius van, engraver', ' Roberts, Lewes, '], 'A12458': ['Smith, John, ', ' Pass, Simon van de, ?, engraver', ' Smith, John, '], 'A12460': ['Smith, John, ', ' Pass, Simon van de, ?, engraver', ' Smith, John, '], 'A12466': [' Smith, John, ', 'Smith, John, ', ' Hole, William , engraver', ' Symonds, William, ?', ' Abbay, Thomas'], 'A13290': ['Symonds, William, ?'], 'A21108': ['Eburne, Richard'], 'A31106': ['Bartoli, Daniello, ', ' Salusbury, Thomas'], 'A37936': ['England and Wales'], 'A41427': ['Gage, Thomas, ?', ' Gage, Thomas, ?'], 'A51598': ['Mun, Thomas, ', ' Mun, Thomas, ', ' Mun, John'], 'A57391': ['Roberts, Lewes, ', ' Roberts, Lewes, '], 'A66847': ['Woolley, Hannah,  ', ' Woolley, Hannah,   Second part of the queenlike closet'], 'A68617': ['Purchas, Samuel, ?', ' Purcha

In [3]:
decade = '/srv/data/metadata/tuning/decade.csv'
auth = authors(decade)
edges = network(auth)
graph(edges,'1600-1610 authors')

In [None]:
sermons = '/srv/data/metadata/tuning/sermonsOurTime.csv'
auth = authors(sermons)
edges = network(auth)
graph(edges,'1580-1641 sermons authors')

In [29]:
group1 = ['A27982', 'A73532', 'A16495', 'B00963', 'A16507', 'A14514', 'A14518', 'A14512', 'A14516', 'A14511', 'A14517', 'A14519', 'A14520', 'A14521', 'A10439', 'A10417', 'A10440', 'A10438', 'A10436', 'A68635', 'A68633', 'A34660', 'A34659', 'A02626', 'A04813', 'A34936', 'A02059', 'A19312', 'A67893', 'A19313']
group0 = ['A11464', 'A11467', 'A34663', 'A80611', 'A80608', 'A80630', 'A80637', 'A34689', 'A34679', 'A34666', 'A13288', 'A13287', 'A13290', 'A19588', 'A19589', 'A19583', 'A19590', 'A17499', 'A69161', 'A19581', 'A02058', 'A15050', 'A01006', 'A01009', 'B13579', 'A01004', 'A01012', 'A01007', 'A06632', 'A01005', 'A01011']
group2 = ['A11472', 'A11474', 'A27888', 'A62166', 'A16552', 'A16457', 'A16527_01', 'A16527_02', 'A16527_03', 'A16527_04', 'A16469', 'A73472', 'A16564', 'A02495']
group3 = ['A80614', 'A69149', 'A34693', 'A80640', 'A80633', 'A34599', 'A80625', 'A80622', 'B20736', 'A34673', 'A34678', 'B20727', 'A71036', 'A84399', 'B20729', 'A80626', 'A80639', 'A34672', 'A80635', 'A34675', 'A80636', 'A34674', 'A34669', 'B07540', 'B07539', 'A01008', 'A01010']

def virginiaGraph(edgelist,title):
    # Create pyvis graph
    g = Network(width=1200,height=800,notebook=True,heading='')
    for id,auth in edgelist:
        if id in group0:
            g.add_node(id, id, title=id, color='#E5CCFF') #purple
            g.add_node(auth, auth, title=auth, color='#E5FFCC') 
            g.add_edge(id,auth)
        elif id in group1: 
            g.add_node(id, id, title=id, color='pink')
            g.add_node(auth, auth, title=auth, color='#E5FFCC') #green 
            g.add_edge(id,auth)
        elif id in group2: 
            g.add_node(id, id, title=id, color='#CCFFFF') #blue 
            g.add_node(auth, auth, title=auth, color='#E5FFCC') 
            g.add_edge(id,auth)
        elif id in group3: 
            g.add_node(id, id, title=id, color='#FFE5CC') #orange
            g.add_node(auth, auth, title=auth, color='#E5FFCC') 
            g.add_edge(id,auth)
    g.show(title+".html")

In [30]:
csv = '/srv/data/metadata/tuning/virginia.csv'
auth = authors(csv)
edges = network(auth)
virginiaGraph(edges,'virginiaAuthorsGrouped')