In [6]:
#the following %pip install commands may need to be run the first time, depending on which modules have been installed:
#%pip install pandas
#%pip install pyvis
#%pip install pyvis.network

import pandas as pd
from pyvis.network import Network

#create DataFrame (DF) from raw csv:
originalDF=pd.DataFrame()
originalDF=pd.read_csv('/Users/arnoferguson/Downloads/2022_SFI_fac_fields - no ext.fellows.csv')

#create names index:
fullnameS=originalDF['first']+' '+originalDF['last']

#create main DF:
mainDF=originalDF.filter(['role','fields'],axis=1)
mainDF.index=fullnameS

#reformat field lists:
separatefieldsDF=originalDF['fields'].str.split(', ',expand=True)
separatefieldsDF.index=fullnameS
fieldlistS=[]
for p in fullnameS:
    tmpL=[f for f in separatefieldsDF.loc[separatefieldsDF.index==p].values.flatten().tolist() if f != None]
    tmpL.sort()
    fieldlistS.append(tmpL)
mainDF.fields=fieldlistS

#create DF to look up field count:
fieldcountsDF=originalDF.filter(['count'],axis=1).dropna(axis=0,how='all')
fieldcountsDF.index=[f[1:-1] for f in originalDF.field.dropna(axis=0,how='all')]


In [7]:
#create matrix DF w/ field similarity weights between people:
matrixDF=pd.DataFrame()
matrixDF.index=fullnameS

for p1 in fullnameS:
    
    #create temp DF for new p1 column w/ similarity weights (to use more efficient pd.concat):
    tmpDF=pd.DataFrame()
    tmpDF.index=fullnameS
    #create list of weights to add to tmpDF:
    tmpL = []
    
    for p2 in fullnameS:
        
        #create tally for overlap in fields:
        tmpOverlapFields=0
        #create tally for p1 total fields:
        tmpP1FieldTotal=0
        
        for p1f in mainDF.loc[p1,'fields']:
            tmpP1FieldTotal+=1
            
            if p1f in mainDF.loc[p2,'fields']:
                tmpOverlapFields+=1
        
        #append weight to list:
        tmpL.append(tmpOverlapFields/tmpP1FieldTotal)
    
    #add list as column to temp DF:
    tmpDF[p1]=tmpL
    #concatenate temp DF to matrix DF:
    matrixDF=pd.concat([matrixDF,tmpDF],axis=1)

In [8]:
#hidden edges between people w/ similarity weight >0.5:

pEdges=[]
pEdgeWeights=[]

for p1 in fullnameS:
    for p2 in fullnameS:
        if p2 != p1:
            if matrixDF.loc[p1,p2]>0.5:
                pEdges.append((p1,p2))
                pEdgeWeights.append(matrixDF.loc[p1,p2])

In [9]:
#fields as nodes with hidden and visible edges to people:

#lists for creating edges:
visibleEdges=[]
visibleEdgeWeights=[]
hiddenEdges=[]
hiddenEdgeWeights=[]

#lists for creating nodes:
allNodes=[]
allNodeWeights=[]
allNodeColors=[]
allNodeTitles=[]
allNodeFonts=[]

for p in fullnameS:
    
    #person's role and fields:
    tmpRole=mainDF.loc[p,'role']
    tmpFields=mainDF.loc[p,'fields']
    
    for f in tmpFields:
        
            #weight (field count) for the node:
            tmpWeight=fieldcountsDF.loc[f,'count']
            
            #add field node:
            if f not in allNodes:
                allNodes.append(f)
                allNodeWeights.append(tmpWeight)
                allNodeTitles.append(f)
                allNodeColors.append('#FCFCFC')
            
            #add field/person hidden edge (small field counts):
            if tmpWeight<25:
                hiddenEdges.append((p,f))
                hiddenEdgeWeights.append(1/(tmpWeight*len(tmpFields)))
            
            #add field/person visible edge (for big field counts):
            if tmpWeight>=25:
                visibleEdges.append((p,f))
                visibleEdgeWeights.append(1/(tmpWeight*len(tmpFields)))
    
    #add person node:
    allNodes.append(p)
    allNodeTitles.append(tmpFields)
    allNodeWeights.append(1)
    
    #custom colors based on role:
    if tmpRole == 'External Faculty':   
        allNodeColors.append('#EEA2AD')
    if tmpRole == 'Resident Faculty':
        allNodeColors.append('#8B3A62')
    if tmpRole == 'External Faculty Fellow':
        allNodeColors.append('#FFB6C1')
    if tmpRole == 'Postdoctoral Fellow':
        allNodeColors.append('#CD6090')

In [10]:
#create pyvis network:
fieldNET=Network(height = 950, width = 1700, notebook = False)
fieldNET.set_options("""var options = {"physics": {"maxVelocity": 20}}""")
#fieldNET.show_buttons(filter_=["physics"])

#add nodes:
fieldNET.add_nodes(allNodes, value=allNodeWeights, color=allNodeColors, title=allNodeTitles)

#add hidden edges between fields and people:
for e in range(0,len(hiddenEdges)-1,1):
    fieldNET.add_edge(hiddenEdges[e][0], hiddenEdges[e][1], weight=hiddenEdgeWeights[e], physics=True, hidden=True)
    
#add visible edges between fields and people:
for e in range(0,len(visibleEdges)-1,1):
    fieldNET.add_edge(visibleEdges[e][0], visibleEdges[e][1], weight=visibleEdgeWeights[e], physics=True, hidden=False)
    
#add hidden edges between people:
for e in range(0,len(pEdges)-1,1):
    fieldNET.add_edge(pEdges[e][0], pEdges[e][1], weight=pEdgeWeights[e], physics=True, hidden=True)

In [11]:
#make network an html object within jupyter folders (if notebook = True in above section, embeds here):
fieldNET.show('fieldNET.html')