In [3]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

import json
import joblib
import pickle

In [2]:
base_url = 'https://papers.nips.cc'

base_request = requests.get(base_url)

In [3]:
base_request.status_code

200

In [4]:
soup = BeautifulSoup(base_request.text, parser='html.parser')
print(soup)

<!DOCTYPE html>
<html lang="en">
<head>
<!-- Required meta tags -->
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"/>
<title>List of Proceedings</title>
<link href="/static/papers/css/papers.css" rel="stylesheet"/>
<!-- Bootstrap CSS -->
<!-- https://codepen.io/surjithctly/pen/PJqKzQ -->
<link crossorigin="anonymous" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" rel="stylesheet"/>
<link href="/static/menus/css/menus.css" id="bootstrap-css" rel="stylesheet"/>
<link crossorigin="anonymous" href="https://use.fontawesome.com/releases/v5.8.1/css/all.css" integrity="sha384-50oBUHEmvpQ+1lW4y57PTFmhCaXp0ML5d60M1M7uH2+nqUivzIebhndOJK28anvf" rel="stylesheet"/>
<script async="" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-MML-AM_CHTML" type="text/javascript">
</script>
<script type="tex

In [5]:
year_specific_links = []

for year_link in soup.find_all('a'):
    if "paper_files" in year_link.attrs['href']:
        year_specific_links.append(f"{base_url}{year_link.attrs['href']}")

In [6]:
year_specific_links

['https://papers.nips.cc/paper_files/paper/2023',
 'https://papers.nips.cc/paper_files/paper/2022',
 'https://papers.nips.cc/paper_files/paper/2021',
 'https://papers.nips.cc/paper_files/paper/2020',
 'https://papers.nips.cc/paper_files/paper/2019',
 'https://papers.nips.cc/paper_files/paper/2018',
 'https://papers.nips.cc/paper_files/paper/2017',
 'https://papers.nips.cc/paper_files/paper/2016',
 'https://papers.nips.cc/paper_files/paper/2015',
 'https://papers.nips.cc/paper_files/paper/2014',
 'https://papers.nips.cc/paper_files/paper/2013',
 'https://papers.nips.cc/paper_files/paper/2012',
 'https://papers.nips.cc/paper_files/paper/2011',
 'https://papers.nips.cc/paper_files/paper/2010',
 'https://papers.nips.cc/paper_files/paper/2009',
 'https://papers.nips.cc/paper_files/paper/2008',
 'https://papers.nips.cc/paper_files/paper/2007',
 'https://papers.nips.cc/paper_files/paper/2006',
 'https://papers.nips.cc/paper_files/paper/2005',
 'https://papers.nips.cc/paper_files/paper/2004',


### navigate to each year's page and get the links for all the papers of that year

In [7]:
all_years_soup = {}

for year_url in year_specific_links:
    year_request = requests.get(year_url)
    print(f"for year {year_url[-4:]}, status code {year_request.status_code}")
    year_soup = BeautifulSoup(year_request.text, parser='html.parser')
    all_years_soup[f"{year_url[-4:]}"] = year_soup

for year 2023, status code 200
for year 2022, status code 200
for year 2021, status code 200
for year 2020, status code 200
for year 2019, status code 200
for year 2018, status code 200
for year 2017, status code 200
for year 2016, status code 200
for year 2015, status code 200
for year 2014, status code 200
for year 2013, status code 200
for year 2012, status code 200
for year 2011, status code 200
for year 2010, status code 200
for year 2009, status code 200
for year 2008, status code 200
for year 2007, status code 200
for year 2006, status code 200
for year 2005, status code 200
for year 2004, status code 200
for year 2003, status code 200
for year 2002, status code 200
for year 2001, status code 200
for year 2000, status code 200
for year 1999, status code 200
for year 1998, status code 200
for year 1997, status code 200
for year 1996, status code 200
for year 1995, status code 200
for year 1994, status code 200
for year 1993, status code 200
for year 1992, status code 200
for year

In [8]:
all_years_soup.keys()

dict_keys(['2023', '2022', '2021', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010', '2009', '2008', '2007', '2006', '2005', '2004', '2003', '2002', '2001', '2000', '1999', '1998', '1997', '1996', '1995', '1994', '1993', '1992', '1991', '1990', '1989', '1988', '1987'])

In [9]:
paper_details = []
# for each year
for year in all_years_soup.keys():
    print(f"fetching for {year}")
    # for each paper in that year
    for li in all_years_soup[f"{year}"].find('ul', class_="paper-list").find_all('li'):
        paper = {}
        paper['paper_year'] = f"{year}"
        paper['paper_url'] = f"{base_url}{li.a['href']}"
        paper['paper_title'] = li.a.text
        paper['paper_author'] = li.i.text
        paper_details.append(paper)
    
df = pd.DataFrame(paper_details)

fetching for 2023
fetching for 2022
fetching for 2021
fetching for 2020
fetching for 2019
fetching for 2018
fetching for 2017
fetching for 2016
fetching for 2015
fetching for 2014
fetching for 2013
fetching for 2012
fetching for 2011
fetching for 2010
fetching for 2009
fetching for 2008
fetching for 2007
fetching for 2006
fetching for 2005
fetching for 2004
fetching for 2003
fetching for 2002
fetching for 2001
fetching for 2000
fetching for 1999
fetching for 1998
fetching for 1997
fetching for 1996
fetching for 1995
fetching for 1994
fetching for 1993
fetching for 1992
fetching for 1991
fetching for 1990
fetching for 1989
fetching for 1988
fetching for 1987


In [10]:
df

Unnamed: 0,paper_year,paper_url,paper_title,paper_author
0,2023,https://papers.nips.cc/paper_files/paper/2023/...,Modelling Cellular Perturbations with the Spar...,"Michael Bereket, Theofanis Karaletsos"
1,2023,https://papers.nips.cc/paper_files/paper/2023/...,Cross-Episodic Curriculum for Transformer Agents,"Lucy Xiaoyang Shi, Yunfan Jiang, Jake Grigsby,..."
2,2023,https://papers.nips.cc/paper_files/paper/2023/...,PaintSeg: Painting Pixels for Training-free Se...,"Xiang Li, Chung-Ching Lin, Yinpeng Chen, Ziche..."
3,2023,https://papers.nips.cc/paper_files/paper/2023/...,Bootstrapping Vision-Language Learning with De...,"Yiren Jian, Chongyang Gao, Soroush Vosoughi"
4,2023,https://papers.nips.cc/paper_files/paper/2023/...,Path following algorithms for $\ell_2$-regular...,"Yunzhang Zhu, Renxiong Liu"
...,...,...,...,...
20281,1987,https://papers.nips.cc/paper_files/paper/1987/...,Connecting to the Past,Bruce MacDonald
20282,1987,https://papers.nips.cc/paper_files/paper/1987/...,PARTITIONING OF SENSORY DATA BY A CORTICAL NET...,"Richard Granger, Jose Ambros-Ingerson, Howard ..."
20283,1987,https://papers.nips.cc/paper_files/paper/1987/...,A Dynamical Approach to Temporal Pattern Proce...,"W. Stornetta, Tad Hogg, Bernardo Huberman"
20284,1987,https://papers.nips.cc/paper_files/paper/1987/...,Minkowski-r Back-Propagation: Learning in Conn...,"Stephen Hanson, David Burr"


In [11]:
df['paper_year'].value_counts(dropna=False)

paper_year
2023    3540
2022    2834
2021    2334
2020    1898
2019    1428
2018    1009
2017     679
2016     569
2014     411
2015     403
2012     370
2013     360
2011     306
2010     292
2009     262
2008     250
2007     217
2004     207
2002     207
2005     207
2006     204
2003     198
2001     197
1993     158
1996     152
1995     152
2000     152
1998     151
1997     150
1999     150
1991     144
1990     143
1994     140
1992     127
1989     101
1988      94
1987      90
Name: count, dtype: int64

In [12]:
df['paper_author'].nunique()

19440

In [13]:
df.to_csv('paper_details_without_abstract.csv', index=False)

### get the abstracts for each paper

In [4]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

import json
import joblib
import pickle

import aiohttp
import asyncio
from tqdm.asyncio import tqdm
from aiohttp import ClientSession

In [3]:
df = pd.read_csv('paper_details_without_abstract.csv')
print(df.shape)
df.head(2)

(20286, 4)


Unnamed: 0,paper_year,paper_url,paper_title,paper_author
0,2023,https://papers.nips.cc/paper_files/paper/2023/...,Modelling Cellular Perturbations with the Spar...,"Michael Bereket, Theofanis Karaletsos"
1,2023,https://papers.nips.cc/paper_files/paper/2023/...,Cross-Episodic Curriculum for Transformer Agents,"Lucy Xiaoyang Shi, Yunfan Jiang, Jake Grigsby,..."


In [6]:
import nest_asyncio
nest_asyncio.apply()

In [7]:
async def fetch_html(url: str, session: ClientSession) -> dict:
    try:
        async with session.get(url) as response:
            html = await response.text()
            return {"paper_url": url, "paper_html": html}
    except Exception as e:
        return {"paper_url": url, "paper_html": None, "error": str(e)}

async def fetch_all_html(urls: list) -> list:
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in urls:
            tasks.append(fetch_html(url, session))
        results = []
        for task in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
            results.append(await task)
        return results

def scrape_urls(df: pd.DataFrame) -> pd.DataFrame:
    urls = df['paper_url'].tolist()
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(fetch_all_html(urls))
    return pd.DataFrame(results)

result_df = scrape_urls(df)

100%|████████████████████████████████████████████████████████████████████████████| 20286/20286 [02:09<00:00, 156.37it/s]


In [8]:
result_df.head()

Unnamed: 0,paper_url,paper_html,error
0,https://papers.nips.cc/paper_files/paper/2014/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",
1,https://papers.nips.cc/paper_files/paper/2014/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",
2,https://papers.nips.cc/paper_files/paper/2022/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",
3,https://papers.nips.cc/paper_files/paper/2022/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",
4,https://papers.nips.cc/paper_files/paper/2014/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",


In [9]:
result_df.shape

(20286, 3)

In [10]:
result_df['error'].value_counts(dropna=False)

error
NaN                    19522
Server disconnected      764
Name: count, dtype: int64

In [12]:
error_df = result_df.loc[result_df['error'].notna()].reset_index(drop=True)
error_df

Unnamed: 0,paper_url,paper_html,error
0,https://papers.nips.cc/paper_files/paper/2019/...,,Server disconnected
1,https://papers.nips.cc/paper_files/paper/2020/...,,Server disconnected
2,https://papers.nips.cc/paper_files/paper/2022/...,,Server disconnected
3,https://papers.nips.cc/paper_files/paper/2023/...,,Server disconnected
4,https://papers.nips.cc/paper_files/paper/2023/...,,Server disconnected
...,...,...,...
759,https://papers.nips.cc/paper_files/paper/2013/...,,Server disconnected
760,https://papers.nips.cc/paper_files/paper/2018/...,,Server disconnected
761,https://papers.nips.cc/paper_files/paper/2018/...,,Server disconnected
762,https://papers.nips.cc/paper_files/paper/2018/...,,Server disconnected


In [13]:
result_df_error = scrape_urls(error_df)

100%|████████████████████████████████████████████████████████████████████████████████| 764/764 [00:06<00:00, 116.86it/s]


In [15]:
result_df_error['error'].value_counts(dropna=False)

error
NaN                    724
Server disconnected     40
Name: count, dtype: int64

In [16]:
error_df2 = result_df_error.loc[result_df_error['error'].notna()].reset_index(drop=True)
error_df2

Unnamed: 0,paper_url,paper_html,error
0,https://papers.nips.cc/paper_files/paper/2021/...,,Server disconnected
1,https://papers.nips.cc/paper_files/paper/2019/...,,Server disconnected
2,https://papers.nips.cc/paper_files/paper/2019/...,,Server disconnected
3,https://papers.nips.cc/paper_files/paper/2019/...,,Server disconnected
4,https://papers.nips.cc/paper_files/paper/1992/...,,Server disconnected
5,https://papers.nips.cc/paper_files/paper/2020/...,,Server disconnected
6,https://papers.nips.cc/paper_files/paper/2022/...,,Server disconnected
7,https://papers.nips.cc/paper_files/paper/2022/...,,Server disconnected
8,https://papers.nips.cc/paper_files/paper/2016/...,,Server disconnected
9,https://papers.nips.cc/paper_files/paper/2022/...,,Server disconnected


In [17]:
result_df_error2 = scrape_urls(error_df2)

100%|███████████████████████████████████████████████████████████████████████████████████| 40/40 [00:01<00:00, 24.74it/s]


In [19]:
result_df_error2

Unnamed: 0,paper_url,paper_html
0,https://papers.nips.cc/paper_files/paper/2007/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
1,https://papers.nips.cc/paper_files/paper/1999/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
2,https://papers.nips.cc/paper_files/paper/2007/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
3,https://papers.nips.cc/paper_files/paper/2023/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
4,https://papers.nips.cc/paper_files/paper/1992/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
5,https://papers.nips.cc/paper_files/paper/2023/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
6,https://papers.nips.cc/paper_files/paper/2021/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
7,https://papers.nips.cc/paper_files/paper/2019/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
8,https://papers.nips.cc/paper_files/paper/2023/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
9,https://papers.nips.cc/paper_files/paper/2022/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."


In [22]:
final_html_df = pd.concat([result_df_error2, result_df_error, result_df], ignore_index=True).drop_duplicates(subset=['paper_url'], keep='first')
print(final_html_df.shape)

(20286, 3)


In [23]:
df.shape

(20286, 4)

In [24]:
final_html_df.head(3)

Unnamed: 0,paper_url,paper_html,error
0,https://papers.nips.cc/paper_files/paper/2007/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",
1,https://papers.nips.cc/paper_files/paper/1999/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",
2,https://papers.nips.cc/paper_files/paper/2007/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",


In [25]:
final_html_df['error'].value_counts(dropna=False)

error
NaN    20286
Name: count, dtype: int64

In [26]:
html_df = final_html_df.drop('error', axis=1)
html_df.head(3)

Unnamed: 0,paper_url,paper_html
0,https://papers.nips.cc/paper_files/paper/2007/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
1,https://papers.nips.cc/paper_files/paper/1999/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
2,https://papers.nips.cc/paper_files/paper/2007/...,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."


In [27]:
html_df.shape

(20286, 2)

In [28]:
final_df = df.merge(html_df, on='paper_url', how='inner')
print(final_df.shape)

(20286, 5)


In [29]:
final_df.head(3)

Unnamed: 0,paper_year,paper_url,paper_title,paper_author,paper_html
0,2023,https://papers.nips.cc/paper_files/paper/2023/...,Modelling Cellular Perturbations with the Spar...,"Michael Bereket, Theofanis Karaletsos","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
1,2023,https://papers.nips.cc/paper_files/paper/2023/...,Cross-Episodic Curriculum for Transformer Agents,"Lucy Xiaoyang Shi, Yunfan Jiang, Jake Grigsby,...","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."
2,2023,https://papers.nips.cc/paper_files/paper/2023/...,PaintSeg: Painting Pixels for Training-free Se...,"Xiang Li, Chung-Ching Lin, Yinpeng Chen, Ziche...","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."


In [30]:
final_df.to_csv('paper_details_with_html.csv', index=False)

In [31]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20286 entries, 0 to 20285
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   paper_year    20286 non-null  int64 
 1   paper_url     20286 non-null  object
 2   paper_title   20286 non-null  object
 3   paper_author  20282 non-null  object
 4   paper_html    20286 non-null  object
dtypes: int64(1), object(4)
memory usage: 792.5+ KB


### fetch cleaned abstract

In [4]:
final_df = pd.read_csv('paper_details_with_html.csv')
final_df.shape

(20286, 5)

In [5]:
final_df.head(1)

Unnamed: 0,paper_year,paper_url,paper_title,paper_author,paper_html
0,2023,https://papers.nips.cc/paper_files/paper/2023/...,Modelling Cellular Perturbations with the Spar...,"Michael Bereket, Theofanis Karaletsos","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he..."


In [6]:
#t = BeautifulSoup(final_df.loc[0, 'paper_html'], parser='html.parser')

In [7]:
#t.find('h4', string='Abstract')

In [8]:
#t.find_all('script')[1]

In [9]:
#t

In [62]:
def get_abstract(text):
    try:
        paper_soup = BeautifulSoup(text, parser='html.parser')
        start_tag = paper_soup.find('h4', string='Abstract')
        end_tag = paper_soup.find_all('script')[1]
        concatenated_text = ""
        # Find all the tags between the start and end tags
        for tag in start_tag.find_next_siblings():
            if tag == end_tag:
                break
            if tag.name == 'p' or tag.name=='pre':  # Only concatenate the text of <p> tags
                concatenated_text += tag.get_text()
    except:
        concatenated_text = np.nan
    return concatenated_text

In [63]:
from tqdm import tqdm
tqdm.pandas()

In [64]:
x = final_df['paper_html'].progress_apply(get_abstract)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20286/20286 [00:48<00:00, 418.50it/s]


In [65]:
final_df['paper_abstract'] = x.copy()

In [66]:
final_df['paper_abstract'].value_counts(dropna=False)

paper_abstract
Abstract Unavailable                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     

In [67]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20286 entries, 0 to 20285
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   paper_year      20286 non-null  int64 
 1   paper_url       20286 non-null  object
 2   paper_title     20286 non-null  object
 3   paper_author    20282 non-null  object
 4   paper_html      20286 non-null  object
 5   paper_abstract  20286 non-null  object
dtypes: int64(1), object(5)
memory usage: 951.0+ KB


In [68]:
final_df.head(2)

Unnamed: 0,paper_year,paper_url,paper_title,paper_author,paper_html,paper_abstract
0,2023,https://papers.nips.cc/paper_files/paper/2023/...,Modelling Cellular Perturbations with the Spar...,"Michael Bereket, Theofanis Karaletsos","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Generative models of observations under interv...
1,2023,https://papers.nips.cc/paper_files/paper/2023/...,Cross-Episodic Curriculum for Transformer Agents,"Lucy Xiaoyang Shi, Yunfan Jiang, Jake Grigsby,...","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...","We present a new algorithm, Cross-Episodic Cur..."


In [None]:
# final_df.to_csv('papers_with_abstract.csv', index=False)
final_df[['paper_year', 'paper_url', 'paper_title', 'paper_author', 'paper_abstract']].to_parquet('ml_research_assistant/data/papers_with_abstract.parquet', index=False)

In [77]:
final_df.sample(1)

Unnamed: 0,paper_year,paper_url,paper_title,paper_author,paper_html,paper_abstract
9028,2020,https://papers.nips.cc/paper_files/paper/2020/...,On the Tightness of Semidefinite Relaxations f...,Richard Zhang,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",The robustness of a neural network to adversar...


In [71]:
final_df.loc[6276, 'paper_url']

'https://papers.nips.cc/paper_files/paper/2022/hash/f649556471416b35e60ae0de7c1e3619-Abstract-Conference.html'

In [72]:
final_df.loc[6276, 'paper_abstract']

'Recent progress in reinforcement learning (RL) has started producing generally capable agents that can solve a distribution of complex environments. These agents are typically tested on fixed, human-authored environments. On the other hand, quality diversity (QD) optimization has been proven to be an effective component of environment generation algorithms, which can generate collections of high-quality environments that are diverse in the resulting agent behaviors. However, these algorithms require potentially expensive simulations of agents on newly generated environments. We propose Deep Surrogate Assisted Generation of Environments (DSAGE), a sample-efficient QD environment generation algorithm that maintains a deep surrogate model for predicting agent behaviors in new environments. Results in two benchmark domains show that DSAGE significantly outperforms existing QD environment generation algorithms in discovering collections of environments that elicit diverse behaviors of a st

In [73]:
final_df.loc[final_df['paper_abstract']==""]

Unnamed: 0,paper_year,paper_url,paper_title,paper_author,paper_html,paper_abstract


In [74]:
for x in final_df.loc[final_df['paper_abstract']=="", 'paper_url']:
    print(x)

In [75]:
final_df.loc[final_df['paper_abstract']=="Abstract Unavailable"]

Unnamed: 0,paper_year,paper_url,paper_title,paper_author,paper_html,paper_abstract
15495,2012,https://papers.nips.cc/paper_files/paper/2012/...,A dynamic excitatory-inhibitory network in a V...,Juan Huo,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
15700,2012,https://papers.nips.cc/paper_files/paper/2012/...,An Integer Optimization Approach to Associativ...,"Dimitris Bertsimas, Allison Chang, Cynthia Rudin","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
15800,2012,https://papers.nips.cc/paper_files/paper/2012/...,Online Sum-Product Computation Over Trees,"Mark Herbster, Stephen Pasteris, Fabio Vitale","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
17268,2006,https://papers.nips.cc/paper_files/paper/2006/...,Learning to be Bayesian without Supervision,"Martin Raphan, Eero Simoncelli","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
17357,2006,https://papers.nips.cc/paper_files/paper/2006/...,TrueSkill™: A Bayesian Skill Rating System,"Ralf Herbrich, Tom Minka, Thore Graepel","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
17850,2003,https://papers.nips.cc/paper_files/paper/2003/...,Local Phase Coherence and the Perception of Blur,"Zhou Wang, Eero Simoncelli","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
18073,2002,https://papers.nips.cc/paper_files/paper/2002/...,PAC-Bayes & Margins,"John Langford, John Shawe-Taylor","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
18075,2002,https://papers.nips.cc/paper_files/paper/2002/...,Maximally Informative Dimensions: Analyzing Ne...,"Tatyana Sharpee, Nicole Rust, William Bialek","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
18244,2001,https://papers.nips.cc/paper_files/paper/2001/...,Learning Spike-Based Correlations and Conditio...,"Aaron Shon, David Hsu, Chris Diorio","\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable
18322,2001,https://papers.nips.cc/paper_files/paper/2001/...,Perceptual Metamers in Stereoscopic Vision,B. Backus,"\n\n\n\n<!doctype html>\n<html lang=""en"">\n<he...",Abstract Unavailable


In [76]:
for x in final_df.loc[final_df['paper_abstract']=="Abstract Unavailable", 'paper_url']:
    print(x)

https://papers.nips.cc/paper_files/paper/2012/hash/12780ea688a71dabc284b064add459a4-Abstract.html
https://papers.nips.cc/paper_files/paper/2012/hash/9e7ba617ad9e69b39bd0c29335b79629-Abstract.html
https://papers.nips.cc/paper_files/paper/2012/hash/e2c4a40d50b47094f571e40efead3900-Abstract.html
https://papers.nips.cc/paper_files/paper/2006/hash/908c9a564a86426585b29f5335b619bc-Abstract.html
https://papers.nips.cc/paper_files/paper/2006/hash/f44ee263952e65b3610b8ba51229d1f9-Abstract.html
https://papers.nips.cc/paper_files/paper/2003/hash/565030e1fce4e481f9823a7de3b8a047-Abstract.html
https://papers.nips.cc/paper_files/paper/2002/hash/68d309812548887400e375eaa036d2f1-Abstract.html
https://papers.nips.cc/paper_files/paper/2002/hash/69dafe8b58066478aea48f3d0f384820-Abstract.html
https://papers.nips.cc/paper_files/paper/2001/hash/4afd521d77158e02aed37e2274b90c9c-Abstract.html
https://papers.nips.cc/paper_files/paper/2001/hash/b5f1e8fb36cd7fbeb7988e8639ac79e9-Abstract.html
https://papers.nips.