In [76]:
import pandas as pd
import networkx as nx
import numpy as np
import nltk.metrics as nm
import editdistance


In [8]:
path = '../save/nber.graphml'
G = nx.read_graphml(path)

In [30]:
# We can generate subgraphs this way
def getJELSubgraph(G, jelcode):
    SG = nx.Graph()
    sge = [e for e in G.edges(data=True) if jelcode in e[2]['jelcode']]
    SG.add_edges_from(sge)
    return SG

In [31]:
# Returns the Mean all-pairs node connectivity of a JEL Code
def getConnEst(G, jelcode):
    return np.mean(nx.all_pairs_node_connectivity(getJELSubgraph(G, jelcode)).values()[0].values()) 

In [93]:
# Get List of JEL codes
e = G.edges(data=True)
jels = set(','.join([x[2]['jelcode'] for x in e]).split(','))

# Get List of Nodes
n = G.nodes()
ndf = pd.DataFrame(n, columns=['author'])

In [127]:
## Merge in Departments
department = pd.read_csv('../../../work_erica/EconIdeaNetwork/save/REPEC_Paper_Info.csv', delimiter='|')

In [132]:
department['authorClean'] = department.author.apply(lambda x: x.replace(',','').split(' '))
department['authorClean'] = department['authorClean'].apply(lambda x: x[1] + x[0])

In [109]:
# Manually Check for matches using Levenstein Distance (tolerance = 2)

authorDF = []

def getAuthorMatches(author):

    matches = pd.DataFrame(ndf.author[ndf.author.apply(lambda x: editdistance.eval(x, author)) < 2])
    matches['repecAuthor'] = author

    return matches

In [110]:
deptMatches = department.authorClean.apply(getAuthorMatches)

In [112]:
deptMatches = pd.concat(list(deptMatches))

In [134]:
authDept = pd.merge(deptMatches, department, how='left', left_on='repecAuthor', right_on='authorClean')

In [138]:
princeton = list(authDept[authDept.department == 'Princeton University, Princeton NJ'].author_x)

In [141]:
Gpton = G.subgraph(princeton)

In [145]:
nx.write_graphml(Gpton, 'pton.graphml')