In [182]:
%%bash

pip install pandas numpy gql



You should consider upgrading via the 'pip install --upgrade pip' command.


In [183]:
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
import pandas as pd

In [210]:
# display full columns
pd.set_option("display.max_colwidth", 0)

In [184]:
# Use a local server
# GRAPHQL_ENDPOINT = "http://localhost:8080/v1/graphql"

# Use the public endpoint
# !! Please be gentle with your queries! 
# !! This is running on a small postgres server that I'm paying for, so please don't overload it with automated scripts. 
# !! As long as you're running queries by hand through notebooks everything should be fine.
GRAPHQL_ENDPOINT = "http://34.107.246.233/v1/graphql"

In [185]:
# create a graphql client

transport = RequestsHTTPTransport(
    url=GRAPHQL_ENDPOINT,
    use_json=True,
    headers={"Content-type": "application/json"},
    verify=False)

client = Client(
    retries=3,
    transport=transport,
    fetch_schema_from_transport=True)

# Query the database

Let's get some data. The following code queries the GraphQL endpoint and gets the 2-level citation graph for a specific paper. It retrieves all papers at most 2 hops away. Assuming each paper on average cites 30 other papers, the result would be 1 + 30 * 30 = ~900 records. Note that the database currently only contains papers labeled as "Computer Science", so cited papers from other fields will not show up.

In [221]:
# Paper title is matched via sql LIKE, so use can use % operators for text matching
PAPER_TITLE = "%Mastering Atari, Go, Chess and Shogi by Planning%"

In [223]:
query = gql("""
fragment paper_fields on papers {
  id
  title
  year
  doi_url
  s2_url
  num_citations
}

query papers($title: String!) {
  papers(limit: 1, where: {title: {_like: $title}}, offset: 0) {
    ...paper_fields
    cites(args: {limit_: 100}) {
      ...paper_fields
      cites(args: {limit_: 100}) {
        ...paper_fields
      }
    }
  }
}
""")

In [224]:
papers = client.execute(query, variable_values={"title": PAPER_TITLE})
root = papers["papers"][0]

In [225]:
def process_recursively(paper, fn):
    """Helper function to process the recursive graph data structure. It runs a function for each element in the graph"""
    fn(paper)
    if not "cites" in paper:
        return
    for cited_paper in paper["cites"]:
        process_recursively(cited_paper, fn)

# Create DataFrame

Next, let's create a dataframe of all papers in the graph. Here, duplicates are eliminated.

In [226]:
# Create a DataFrame of all papers that appear in this subgraph
paper_map = {}
def add_paper(paper):
    fields = { k : paper[k] for k in paper if k != "cites" }
    paper_map[paper["id"]] = fields
process_recursively(root, add_paper)
papers_df = pd.DataFrame.from_records(list(paper_map.values()), index="id")

print(f"{len(papers_df)} unique papers")
papers_df.head()

744 unique papers


Unnamed: 0_level_0,title,year,doi_url,s2_url,num_citations
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
c39fb7a46335c23f7529dd6f9f980462fd38653a,"Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model",2019,,https://semanticscholar.org/paper/c39fb7a46335c23f7529dd6f9f980462fd38653a,25
0e7638dc16a5e5e9e46c91272bfb9c3dd242ef6d,Between MDPs and Semi-MDPs: A Framework for Temporal Abstraction in Reinforcement Learning,1999,https://doi.org/10.1016/S0004-3702%2899%2900052-1,https://semanticscholar.org/paper/0e7638dc16a5e5e9e46c91272bfb9c3dd242ef6d,1622
036373f17e5e47bcadc289e6c57d61cf5e08fe3d,Hierarchical Solution of Markov Decision Processes using Macro-actions,1998,,https://semanticscholar.org/paper/036373f17e5e47bcadc289e6c57d61cf5e08fe3d,228
07b6e294c47ef0d72b3229ca6b891dd772adb47d,Theoretical Results on Reinforcement Learning with Temporally Abstract Options,1998,https://doi.org/10.1007/BFb0026709,https://semanticscholar.org/paper/07b6e294c47ef0d72b3229ca6b891dd772adb47d,74
0a5bac1a42c05d4711bcd23c8caae60eb886fbb3,Planning under Time Constraints in Stochastic Domains,1995,https://doi.org/10.1016/0004-3702%2894%2900086-G,https://semanticscholar.org/paper/0a5bac1a42c05d4711bcd23c8caae60eb886fbb3,214


## Most Popular Papers

The following shows papers with the overall most citations present in this subgraph. These are the popular papers that somewhat relevant to the root paper.

In [227]:
papers_df.sort_values("num_citations", ascending=False).head(25)

Unnamed: 0_level_0,title,year,doi_url,s2_url,num_citations
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abd1c342495432171beb7ca8fd9551ef13cbd0ff,ImageNet Classification with Deep Convolutional Neural Networks,2012,https://doi.org/10.1061/%28ASCE%29GT.1943-5606.0001284,https://semanticscholar.org/paper/abd1c342495432171beb7ca8fd9551ef13cbd0ff,40246
2c03df8b48bf3fa39054345bafabfeff15bfd11d,Deep Residual Learning for Image Recognition,2016,https://doi.org/10.1109/cvpr.2016.90,https://semanticscholar.org/paper/2c03df8b48bf3fa39054345bafabfeff15bfd11d,32655
13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986,Random Forests,2004,https://doi.org/10.1023/A%3A1010933404324,https://semanticscholar.org/paper/13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986,24563
44d2abe2175df8153f465f6c39b68b76a0d40ab9,Long Short-Term Memory,1997,https://doi.org/10.1162/neco.1997.9.8.1735,https://semanticscholar.org/paper/44d2abe2175df8153f465f6c39b68b76a0d40ab9,20985
97efafdb4a3942ab3efba53ded7413199f79c054,Reinforcement Learning: An Introduction,2005,https://doi.org/10.1109/TNN.1998.712192,https://semanticscholar.org/paper/97efafdb4a3942ab3efba53ded7413199f79c054,18170
4f607f03272e4d62708f5b2441355f9e005cb452,Convex Optimization,2006,https://doi.org/10.1017/CBO9780511804441,https://semanticscholar.org/paper/4f607f03272e4d62708f5b2441355f9e005cb452,16074
2e62d1345b340d5fda3b092c460264b9543bc4b5,Genetic Algorithms in Search Optimization and Machine Learning,1989,https://doi.org/10.5860/choice.27-0936,https://semanticscholar.org/paper/2e62d1345b340d5fda3b092c460264b9543bc4b5,16067
4b4279db68b16e20fbc56f9d41980a950191d30a,Adaptation in natural and artificial systems,1975,,https://semanticscholar.org/paper/4b4279db68b16e20fbc56f9d41980a950191d30a,15985
162d958ff885f1462aeda91cd72582323fd6a1f4,Gradient-based learning applied to document recognition,1998,https://doi.org/10.1109/5.726791,https://semanticscholar.org/paper/162d958ff885f1462aeda91cd72582323fd6a1f4,15423
e15cf50aa89fee8535703b9f9512fca5bfc43327,Going deeper with convolutions,2015,https://doi.org/10.1109/CVPR.2015.7298594,https://semanticscholar.org/paper/e15cf50aa89fee8535703b9f9512fca5bfc43327,13348


## Most cited papers within the subgraph

The following finds papers that are cited most often **within this subgraph**. These papers tend to be more relevant to the root paper since they are often cited by related papers.

In [228]:
from collections import Counter

c = Counter()
process_recursively(root, lambda p: c.update([p["id"]]))
paper_counts_df = pd.DataFrame.from_records(c.most_common(), columns=["id", "subgraph_citation_count"], index="id")
paper_counts_df = pd.concat([papers_df, paper_counts_df], axis=1)
paper_counts_df.sort_values("subgraph_citation_count", ascending=False).head(20)

Unnamed: 0,title,year,doi_url,s2_url,num_citations,subgraph_citation_count
e0e9a94c4a6ba219e768b4e59f72c18f0a22e23d,Human-level control through deep reinforcement learning,2015,https://doi.org/10.1038/nature14236,https://semanticscholar.org/paper/e0e9a94c4a6ba219e768b4e59f72c18f0a22e23d,6647,15
97efafdb4a3942ab3efba53ded7413199f79c054,Reinforcement Learning: An Introduction,2005,https://doi.org/10.1109/TNN.1998.712192,https://semanticscholar.org/paper/97efafdb4a3942ab3efba53ded7413199f79c054,18170,13
f82e4ff4f003581330338aaae71f60316e58dd26,The Arcade Learning Environment: An Evaluation Platform for General Agents (Extended Abstract),2013,https://doi.org/10.1613/jair.3912,https://semanticscholar.org/paper/f82e4ff4f003581330338aaae71f60316e58dd26,1003,12
69e76e16740ed69f4dc55361a3d319ac2f1293dd,Asynchronous Methods for Deep Reinforcement Learning,2016,,https://semanticscholar.org/paper/69e76e16740ed69f4dc55361a3d319ac2f1293dd,2365,11
e4257bc131c36504a04382290cbc27ca8bb27813,Action-Conditional Video Prediction using Deep Networks in Atari Games,2015,,https://semanticscholar.org/paper/e4257bc131c36504a04382290cbc27ca8bb27813,444,7
c6170fa90d3b2efede5a2e1660cb23e1c824f2ca,Prioritized Experience Replay,2015,,https://semanticscholar.org/paper/c6170fa90d3b2efede5a2e1660cb23e1c824f2ca,864,7
60b7d47758a71978e74edff6dd8dea4d9c791d7a,PILCO: A Model-Based and Data-Efficient Approach to Policy Search,2011,,https://semanticscholar.org/paper/60b7d47758a71978e74edff6dd8dea4d9c791d7a,642,7
3b9732bb07dc99bde5e1f9f75251c6ea5039373e,Deep Reinforcement Learning with Double Q-Learning,2016,,https://semanticscholar.org/paper/3b9732bb07dc99bde5e1f9f75251c6ea5039373e,1388,7
54c4cf3a8168c1b70f91cf78a3dc98b671935492,Reinforcement learning for robots using neural networks,1992,,https://semanticscholar.org/paper/54c4cf3a8168c1b70f91cf78a3dc98b671935492,529,6
846aedd869a00c09b40f1f1f35673cb22bc87490,Mastering the game of Go with deep neural networks and tree search,2016,https://doi.org/10.1038/nature16961,https://semanticscholar.org/paper/846aedd869a00c09b40f1f1f35673cb22bc87490,4912,6
