In [90]:
import json
import requests
from tqdm.notebook import tqdm
from pprint import pprint
from bs4 import BeautifulSoup

years = range(2017, 2021)
keywords = ['model human', 'human navigation', 'theory of mind', 
            'inverse planning', 'inverse reinforcement learning',
            'learn reward function', 'goal inference',
            'infer the goal', 'infer the plan', 'plan recognition',
            ]
papers = []

def get_abstract_link(URL):
    link = 'None'
    supplemental = 'None'
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')
    body = soup.find_all('div', class_='col')
    if len(body) > 0:
        body = body[0]
        abstract = body.find_all('p')[-1].text
        urls = body.find_all('a', class_='btn')
        for url in urls:
            url = url['href']
            if 'Paper' in url:
                link = 'https://proceedings.neurips.cc'+url
            elif 'Supplemental' in url:
                supplemental = 'https://proceedings.neurips.cc'+url
        return abstract, link, supplemental
    return "", "", ""

for year in reversed(years):
    URL = f'https://proceedings.neurips.cc/paper/{year}'
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')
    links = soup.find_all('div', class_='col')[0].find_all('li')
    for link in links:
        found = []
        author = link.find_all('i')[0].text
        a = link.find_all('a')[0]
        paper = a.text
        URL = 'https://proceedings.neurips.cc'+a['href']
        abstract, link, supplemental = get_abstract_link(URL)
        for keyword in keywords:
            if keyword in abstract.lower():
                found.append(keyword)
        if len(found) > 0:
            entry = {
                'paper': paper,
                'author': author,
                'abstract': abstract,
                'link': link,
                'supplemental': supplemental,
                'year': year,
                'keywords': found
            }
            pprint(entry)
            papers.append(entry)

    ## for saving the summary as a json file, takes a long while
    print(len(papers))
    with open(f"paper-{year}.json", "w") as outfile:
        json.dump(papers, outfile, indent=4)

{'abstract': 'Inverse Reinforcement Learning addresses the problem of '
             "inferring an expert's reward function from demonstrations. "
             'However, in many applications, we not only have access to the '
             "expert's near-optimal behaviour, but we also observe part of her "
             'learning process.\n'
             'In this paper, we propose a new algorithm for this setting, in '
             'which the goal is to recover the reward function being optimized '
             'by an agent, given a sequence of policies produced during '
             'learning. Our approach is based on the assumption that the '
             'observed agent is updating her policy parameters along the '
             'gradient direction. Then we extend our method to deal with the '
             'more realistic scenario where we only have access to a dataset '
             'of learning trajectories. For both settings, we provide '
             "theoretical insights into our a

{'abstract': 'Real-world networks, especially the ones that emerge due to '
             'actions of animate agents (e.g. humans, animals), are the result '
             'of underlying strategic mechanisms aimed at maximizing '
             'individual or collective benefits. Learning approaches built to '
             'capture these strategic insights would gain interpretability and '
             'flexibility benefits that are required to generalize beyond '
             'observations.\n'
             'To this end, we consider a game-theoretic formalism of network '
             'emergence that accounts for the underlying strategic mechanisms '
             'and take it to the observed data. \n'
             'We propose MINE (Multi-agent Inverse models of Network Emergence '
             'mechanism), a new learning framework that solves Markov-Perfect '
             'network emergence games using multi-agent inverse reinforcement '
             "learning. MINE jointly discovers agent

{'abstract': 'People routinely infer the goals of others by observing their '
             'actions over time. Remarkably, we can do so even when those '
             'actions lead to failure, enabling us to assist others when we '
             'detect that they might not achieve their goals. How might we '
             'endow machines with similar capabilities? Here we present an '
             'architecture capable of inferring an agent’s goals online from '
             'both optimal and non-optimal sequences of actions. Our '
             'architecture models agents as boundedly-rational planners that '
             'interleave search with execution by replanning, thereby '
             'accounting for sub-optimal behavior. These models are specified '
             'as probabilistic programs, allowing us to represent and perform '
             "efficient Bayesian inference over an agent's goals and internal "
             'planning processes. To perform such inference, we develop '

{'abstract': 'Making decisions in the presence of a strategic opponent '
             'requires one to take into account the opponent’s ability to '
             'actively mask its intended objective. To describe such strategic '
             'situations, we introduce the non-cooperative inverse '
             'reinforcement learning (N-CIRL) formalism. The N-CIRL formalism '
             'consists of two agents with completely misaligned objectives, '
             'where only one of the agents knows the true objective function. '
             'Formally, we model the N-CIRL formalism as a zero-sum Markov '
             'game with one-sided incomplete information. Through interacting '
             'with the more informed player, the less informed player attempts '
             'to both infer and optimize the true objective function. As a '
             'result of the one-sided incomplete information, the multi-stage '
             'game can be decomposed into a sequence of single- stag

{'abstract': 'Methods for learning from demonstration (LfD) have shown success '
             'in acquiring behavior policies by imitating a user. However, '
             'even for a single task, LfD may require numerous demonstrations. '
             'For versatile agents that must learn many tasks via '
             'demonstration, this process would substantially burden the user '
             'if each task were learned in isolation. To address this '
             'challenge, we introduce the novel problem of lifelong learning '
             'from demonstration, which allows the agent to continually build '
             'upon knowledge learned from previously demonstrated tasks to '
             'accelerate the learning of new tasks, reducing the amount of '
             'demonstrations required. As one solution to this problem, we '
             'propose the first lifelong learning approach to inverse '
             'reinforcement learning, which learns consecutive tasks via '
    

{'abstract': 'Inverse reinforcement learning (IRL) attempts to infer human '
             'rewards or preferences from observed behavior. Since human '
             'planning systematically deviates from rationality, several '
             'approaches have been tried to account for specific human '
             'shortcomings. \n'
             'However, the general problem of inferring the reward function of '
             'an agent of unknown rationality has received little attention.\n'
             'Unlike the well-known ambiguity problems in IRL, this one is '
             'practically relevant but cannot be resolved by observing the '
             "agent's policy in enough environments.\n"
             'This paper shows (1) that a No Free Lunch result implies it is '
             'impossible to uniquely decompose a policy into a planning '
             'algorithm and reward function, and (2) that even with a '
             "reasonable simplicity prior/Occam's razor on the set of "


In [88]:
with open(f"papers.json", "w") as outfile:
    json.dump(papers, outfile, indent=4)

In [83]:
## for printing a summary by keyword

index = 0
papers_by_keywords = {k:[] for k in keywords}
for paper in papers:
    for keyword in paper['keywords']:
        papers_by_keywords[keyword].append(index)
    index += 1
pprint({k:len(v) for k,v in papers_by_keywords.items()})

{'goal inference': 1,
 'human navigation': 0,
 'infer the goal': 1,
 'infer the plan': 0,
 'inverse planning': 1,
 'inverse reinforcement learning': 24,
 'learn reward function': 2,
 'model human': 1,
 'plan recognition': 0,
 'theory of mind': 2}


In [85]:
## for downloading papers

import urllib3
import shutil

c = urllib3.PoolManager()

def download_paper(url, paper_name):
    with c.request('GET',url, preload_content=False) as resp, open('papers/'+paper_name, 'wb') as out_file:
        shutil.copyfileobj(resp, out_file)
        resp.release_conn()
    
## for downloading the papers
index = 0
for paper in papers:
    print(index, paper)
    index += 1
    download_paper(paper['link'], paper['paper']+'.pdf')
    download_paper(paper['supplemental'], paper['paper']+'.pdf')

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
