In [1]:
import networkx as nx
from helpers import read_jsonl, meta_path, texts_path, PROCESSED

In [2]:
def create_edges(citations: dict) -> list:
    edges = []
    for node in citations:
        for parent in citations[node]["inbound_citations"]:
            if parent in citations.keys():
                edges.append((parent, node))
        for child in citations[node]["outbound_citations"]:
            if child in citations.keys():
                edges.append((node, child))
    return edges

In [3]:
relevant_papers = {paper["paper_id"] for paper in read_jsonl(texts_path)}

In [4]:
metadata = {}
for paper in read_jsonl(meta_path):
    _id = paper["paper_id"]
    date_published = paper["year"]
    if _id in relevant_papers and date_published is not None and date_published >= 1991:
        metadata[_id] = dict(
            inbound_citations=paper["inbound_citations"],
            outbound_citations=paper["outbound_citations"],
        )
        
len(metadata)

9145

In [5]:
edges = create_edges(metadata)

G = nx.DiGraph()
G.add_edges_from(edges)

In [6]:
nx.write_gexf(G, PROCESSED / "210808-gephi.gexf")

## Measuring centrality

We assume that the more central a publication is located within our network, the more important it is

In [7]:
def calculate_centrality(G, method, limit=None):
    lookup = {
        "degree_centrality": nx.degree_centrality,
        "closeness_centrality": nx.closeness_centrality, 
        "betweenness_centrality": nx.betweenness_centrality,
    }
    dc = lookup[method](G)
    centrality = dict(sorted(dc.items(), key=lambda item: item[1], reverse=True))
    if limit is None:
        return centrality
    ids = list(centrality.keys())[:limit]
    return {k:v for k,v in centrality.items() if k in ids}


def add_info(comparison_set):
    result = []
    for paper in read_jsonl(meta_path):
        _id = paper["paper_id"]
        if _id in comparison_set:
            result.append({
                "id": paper["paper_id"],
                "title": paper["title"],
                "year": paper["year"]
            })
    return sorted(result, key=lambda d: d["year"])

In [8]:
centrality = calculate_centrality(G, method="betweenness_centrality", limit=10)
centrality

{'3354592': 7.4156470152020756e-06,
 '159279438': 7.4156470152020756e-06,
 '53065201': 4.943764676801384e-06,
 '195490641': 4.943764676801384e-06,
 '55914559': 4.943764676801384e-06,
 '39342543': 4.943764676801384e-06,
 '55694829': 4.943764676801384e-06,
 '53135628': 3.7078235076010378e-06,
 '55024365': 2.471882338400692e-06,
 '29322734': 2.471882338400692e-06}

In [9]:
# most central paper
metadata[max(centrality, key=centrality.get)]

{'inbound_citations': ['3235947', '195490641', '4564427'],
 'outbound_citations': ['48451139',
  '155492710',
  '23331735',
  '22266624',
  '53546359',
  '20144768',
  '59146960',
  '154457832',
  '42295037',
  '155004773',
  '1675591',
  '145052632',
  '153346601',
  '155014357',
  '153563125',
  '32911950',
  '153507874',
  '5008492',
  '6107989',
  '43858142',
  '24536392',
  '35947628',
  '158837917',
  '154800815',
  '19338547',
  '157282379',
  '159279438',
  '154874851',
  '152762905',
  '22128517',
  '936875',
  '154325094',
  '154001332',
  '10760950',
  '20083357',
  '151102473',
  '4107075',
  '168069164',
  '154267120',
  '153422929',
  '6317075',
  '40227257',
  '12431554',
  '15211782',
  '154779638',
  '155179130',
  '126749605',
  '198320739',
  '55301358',
  '1906485',
  '11309762',
  '154230677',
  '157270565']}

In [10]:
top_15_central_papers = list(centrality.keys())[:15]
result = add_info(top_15_central_papers)

In [11]:
for entry in result:
    print(f'- {entry["year"]}, {entry["title"]}')

- 2006, A Conceptual Framework for Interpreting Recorded Human History
- 2008, Economic Consequences of Terrorism in Developed and Developing Countries: An Overview
- 2011, The Political Costs of Crisis Bargaining: Presidential Rhetoric and the Role of Party
- 2014, The Evolving Debate on the Effect of Foreign Aid on Corruption and Institutions in Africa
- 2015, Political Corporate Social Responsibility: Reviewing Theories and Setting New Agendas
- 2016, Foreign aid and governance in Africa
- 2017, Structural adjustment programmes adversely affect vulnerable populations: a systematic-narrative review of their effect on child and maternal health
- 2017, Allies in Transparency? Parliamentary, Judicial and Administrative Interplays in the EU’s International Negotiations
- 2018, International financial institutions as agents of neoliberalism
- 2019, The Impact of Foreign Aid on Maternal Mortality


# Community detection



In [12]:
from networkx.algorithms import community

In [13]:
# Girvan–Newman method
communities_generator = community.girvan_newman(G)
top_level_communities = next(communities_generator)

In [14]:
communities = tuple(sorted(c) for c in top_level_communities)

In [15]:
largest_clique = sorted(communities, key=len)[-1]
result = add_info(largest_clique)

In [16]:
for entry in result:
    print(f'- {entry["year"]}, {entry["title"]}')

- 1998, Corruption Around the World: Causes, Consequences, Scope, and Cures
- 2001, Global Political Economy: Understanding the International Economic Order
- 2005, Rules for the world: international organizations in global politics
- 2006, The Governance of the IMF: The Need for Comprehensive Reform
- 2006, Towards a Dynamic Model of the Interplay Between International Institutions
- 2007, To What Extent are International Organizations (IOs) Autonomous Actors in World Politics
- 2008, Trust in International Organizations: An Empirical Investigation Focusing on the United Nations
- 2008, International Organizations as Corporate Actors: Agency and Emergence in Theories of International Relations
- 2009, Power, Governance, and Ideas in Chile’s Free Trade Agreement Policy
- 2010, 1 A Perspective on Peace and Economic Cooperation
- 2011, Demistifying Globalization and the State: Preliminary Comments on Re-Commodification, Institutions and Innovation
- 2012, Agenda-setting in the Common Sec