# Download the non-blind ICLR2025 data from OpenReview

In [1]:
import numpy as np
import pandas as pd
import pylab as plt

import requests
import time

In [2]:
# Load iclr2025v1

iclr_old = pd.read_parquet('../data/iclr25v1.parquet')

iclr_old

Unnamed: 0,year,id,title,abstract,authors,decision,scores,keywords,labels
0,2017,B1-Hhnslg,Prototypical Networks for Few-shot Learning,A recent approach to few-shot classification c...,"Jake Snell, Kevin Swersky, Richard Zemel",Reject,"[6, 4, 5]","[deep learning, transfer learning]",transfer learning
1,2017,B1-q5Pqxl,Machine Comprehension Using Match-LSTM and Ans...,Machine comprehension of text is an important ...,"Shuohang Wang, Jing Jiang",Accept (Poster),"[6, 6, 7]","[natural language processing, deep learning]",language models
2,2017,B16Jem9xe,Learning in Implicit Generative Models,Generative adversarial networks (GANs) provide...,"Shakir Mohamed, Balaji Lakshminarayanan",Invite to Workshop Track,"[8, 7, 6]",[unsupervised learning],unlabeled
3,2017,B16dGcqlx,Third Person Imitation Learning,Reinforcement learning (RL) makes it possible ...,"Bradly C Stadie, Pieter Abbeel, Ilya Sutskever",Accept (Poster),"[6, 5, 6]",[],unlabeled
4,2017,B184E5qee,Improving Neural Language Models with a Contin...,We propose an extension to neural network lang...,"Edouard Grave, Armand Joulin, Nicolas Usunier",Accept (Poster),"[7, 9, 5]",[natural language processing],language models
...,...,...,...,...,...,...,...,...,...
34519,2025,zxO4WuVGns,Inverse decision-making using neural amortized...,Bayesian observer and actor models have provid...,,,[],"[bayesian actor models, perception and action,...",unlabeled
34520,2025,zxbQLztmwb,Emergent Symbol-Like Number Variables in Artif...,"Symbolic programs, defined by discrete variabl...",,,[],"[mechanistic interpretability, numeric cogniti...",unlabeled
34521,2025,zxqdVo9FjY,Generalization for Least Squares Regression wi...,Random matrix theory has proven to be a valuab...,,,[],"[generalization, random matrix theory, spiked ...",unlabeled
34522,2025,zyGrziIVdE,Exploration by Running Away from the Past,The ability to explore efficiently and effecti...,,,[],"[reinforcement learning, exploration, deep lea...",RL


In [3]:
# Download titles/abstracts/authors of all papers
# We are doing it here to disambiguate the authors by author IDs

# Note: a bug fix in author name parsing in 2017

# Note: I'm now getting 5 extra papers for 2018, no idea why

# Note: I'm now getting 1500+ extra papers for 2025, no idea why

titles = []
abstracts = []
years = []
forum_ids = []
decisions = []
authors = []
author_ids = []
keywords = []

for year in [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]:
    print(year, end=': ')
    for query in ['submission', 'Submission', 'Blind_Submission', 
                  'Withdrawn_Submission', 'Rejected_Submission', 
                  'Desk_Rejected_Submission', '']:
        if year <= 2017:
            if query == '':
                continue
            url = f'https://api.openreview.net/notes?invitation=ICLR.cc%2F{year}%2Fconference%2F-%2F{query}'
        elif year <= 2023:
            if query == '':
                continue
            url = f'https://api.openreview.net/notes?invitation=ICLR.cc%2F{year}%2FConference%2F-%2F{query}'
        else:
            if query != '':
                query = '/' + query
            url = f'https://api2.openreview.net/notes?content.venueid=ICLR.cc/{year}/Conference{query}'        
            
        for offset in range(0, 10_000, 1000):
            json = requests.get(url + f'&offset={offset}').json()
            
            if 'name' in json and json['name'] == 'RateLimitError':
                time.sleep(30)
                json = requests.get(url + f'&offset={offset}').json()
                
            df = pd.DataFrame(json['notes'])
            if len(df) > 0:
                print(len(df), end=' ')
                if year < 2024:
                    titles    += [d['title'].strip() for d in df['content'].values]
                    abstracts += [d['abstract'].strip() for d in df['content'].values]
                    keywords  += [d['keywords'] for d in df['content'].values]
                    if year == 2017:
                        authors   += [', '.join(d['authors']) if isinstance(d['authors'], list)
                                      else d['authors'] for d in df['content']]
                        author_ids = [', '.join(d['authorids']) if 'authorids' in d 
                                      else d['author_emails'] for d in df['content']]
                    else:
                        authors   += [', '.join(d['authors']) for d in df['content']]
                        author_ids  += [', '.join(d['authorids']) for d in df['content']]
                else:
                    titles    += [d['title']['value'].strip() for d in df['content'].values]
                    abstracts += [d['abstract']['value'].strip() for d in df['content'].values]
                    keywords  += [d['keywords']['value'] for d in df['content'].values]
                    if 'authors' in df['content'].values[0]:
                        authors   += [', '.join(d['authors']['value']) for d in df['content'].values]
                        author_ids   += [', '.join(d['authorids']['value']) for d in df['content'].values]
                    else:
                        authors += [''] * len(df)
                        author_ids += [''] * len(df)
                years     += [year] * len(df)
                forum_ids += list(df.forum)
                                                
                if 'Withdrawn_Submission' in query:
                    decisions += ['Withdrawn'] * len(df)
                elif 'Desk_Rejected_Submission' in query:
                    decisions += ['Desk rejected'] * len(df)
                elif 'Rejected_Submission' in query:
                    decisions += ['Reject'] * len(df)    
                else:
                    decisions += [''] * len(df)
            else:
                break
    print('')
print('')

print(f'Found {len(titles)} papers\n')

2017: 490 
2018: 935 83 
2019: 1000 419 160 
2020: 1000 1000 213 369 12 
2021: 1000 1000 594 403 17 
2022: 1000 1000 617 779 26 
2023: 1000 1000 1000 793 1000 144 18 
2024: 1000 651 1000 1000 1000 440 53 1000 1000 260 
2025: 1000 1000 953 1000 1000 1000 1000 945 70 1000 1000 1000 704 

Found 36148 papers



In [4]:
iclr = pd.DataFrame.from_dict({
    'year': years,
    'id': forum_ids, 
    'title': titles,
    'abstract': abstracts,
    'authors': authors,
    'author_ids': author_ids,
    'decision': decisions,
    'scores': [[]] * len(forum_ids),
    'keywords': [[k.lower() for k in keys] for keys in keywords],
    'labels': [""] * len(forum_ids)
})

# Removing author IDs for papers <= 2020 because emails were used as IDs
# and some authors had missing emails

iclr.loc[iclr.year <= 2020, "author_ids"] = ""

# Kicking out nonsense abstracts

n_submissions = [np.sum(iclr.year == y) for y in np.arange(2017, 2026)]
print('Submisions per year:', n_submissions, '\n')

mask = np.array([len(a) >= 100 for a in iclr.abstract])

print(f'Removing {np.sum(~mask)} submissions with abstract length below 100 characters\n')

iclr = iclr[mask].reset_index(drop=True)

n_submissions = [np.sum(iclr.year == y) for y in np.arange(2017, 2026)]
print('Submisions per year:', n_submissions, '\n')

print('Dataset size:', len(iclr))

# Sort by year and id

iclr = iclr.sort_values(by=['year', 'id']).reset_index(drop=True)

iclr

Submisions per year: [490, 1018, 1579, 2594, 3014, 3422, 4955, 7404, 11672] 

Removing 35 submissions with abstract length below 100 characters

Submisions per year: [489, 1012, 1569, 2593, 3009, 3422, 4955, 7401, 11663] 

Dataset size: 36113


Unnamed: 0,year,id,title,abstract,authors,author_ids,decision,scores,keywords,labels
0,2017,B1-Hhnslg,Prototypical Networks for Few-shot Learning,A recent approach to few-shot classification c...,"Jake Snell, Kevin Swersky, Richard Zemel",,,[],"[deep learning, transfer learning]",
1,2017,B1-q5Pqxl,Machine Comprehension Using Match-LSTM and Ans...,Machine comprehension of text is an important ...,"Shuohang Wang, Jing Jiang",,,[],"[natural language processing, deep learning]",
2,2017,B16Jem9xe,Learning in Implicit Generative Models,Generative adversarial networks (GANs) provide...,"Shakir Mohamed, Balaji Lakshminarayanan",,,[],[unsupervised learning],
3,2017,B16dGcqlx,Third Person Imitation Learning,Reinforcement learning (RL) makes it possible ...,"Bradly C Stadie, Pieter Abbeel, Ilya Sutskever",,,[],[],
4,2017,B184E5qee,Improving Neural Language Models with a Contin...,We propose an extension to neural network lang...,"Edouard Grave, Armand Joulin, Nicolas Usunier",,,[],[natural language processing],
...,...,...,...,...,...,...,...,...,...,...
36108,2025,zxg6601zoc,Re-Imagining Multimodal Instruction Tuning: A ...,Multimodal instruction tuning has proven to be...,"Yiyang Liu, James Chenhao Liang, Ruixiang Tang...","~Yiyang_Liu3, ~James_Chenhao_Liang1, ~Ruixiang...",,[],"[representation tuning, large multimodal model...",
36109,2025,zxqdVo9FjY,Generalization for Least Squares Regression wi...,Random matrix theory has proven to be a valuab...,"Jiping Li, Rishi Sonthalia","~Jiping_Li1, ~Rishi_Sonthalia1",Reject,[],"[generalization, random matrix theory, spiked ...",
36110,2025,zyGrziIVdE,Exploration by Running Away from the Past,The ability to explore efficiently and effecti...,"Paul-Antoine LE TOLGUENEC, Yann Besse, Florent...","~Paul-Antoine_LE_TOLGUENEC1, ~Yann_Besse2, ~Fl...",Reject,[],"[reinforcement learning, exploration, deep lea...",
36111,2025,zz9jAssrwL,Bayesian Policy Distillation via Offline RL fo...,High-performance deep reinforcement learning f...,"Jangwon Kim, Yoonsu Jang, Jonghyeok Park, Yoon...","~Jangwon_Kim2, ~Yoonsu_Jang1, ~Jonghyeok_Park3...",Withdrawn,[],"[neural network compression, reinforcement lea...",


In [5]:
# Copy decisions/scores from iclr25v1

for i, idd in enumerate(iclr.id):
    if idd in iclr_old.id.values:
        iclr_old_pos = np.where(iclr_old.id.values == idd)[0][0]
        iclr.at[i, 'decision'] = iclr_old.at[iclr_old_pos, 'decision']
        iclr.at[i, 'scores'] = iclr_old.at[iclr_old_pos, 'scores']
        
# Five papers from 2018 are withdrawn without any info and are kept anonymous
# For some reason we did not have them in prior versions of the iclr dataset
# {'B1O3OgbRW', 'B1oOAeb0b', 'HyNyyCxA-', 'HyowcvgAZ', 'SyrYCWWRW'}

missing_info = set(iclr[iclr.year < 2025].id) - set(iclr_old[iclr_old.year < 2025].id)
iclr.loc[iclr.id.isin(missing_info), 'decision'] = 'Withdrawn'
        
iclr

Unnamed: 0,year,id,title,abstract,authors,author_ids,decision,scores,keywords,labels
0,2017,B1-Hhnslg,Prototypical Networks for Few-shot Learning,A recent approach to few-shot classification c...,"Jake Snell, Kevin Swersky, Richard Zemel",,Reject,"[6, 4, 5]","[deep learning, transfer learning]",
1,2017,B1-q5Pqxl,Machine Comprehension Using Match-LSTM and Ans...,Machine comprehension of text is an important ...,"Shuohang Wang, Jing Jiang",,Accept (Poster),"[6, 6, 7]","[natural language processing, deep learning]",
2,2017,B16Jem9xe,Learning in Implicit Generative Models,Generative adversarial networks (GANs) provide...,"Shakir Mohamed, Balaji Lakshminarayanan",,Invite to Workshop Track,"[8, 7, 6]",[unsupervised learning],
3,2017,B16dGcqlx,Third Person Imitation Learning,Reinforcement learning (RL) makes it possible ...,"Bradly C Stadie, Pieter Abbeel, Ilya Sutskever",,Accept (Poster),"[6, 5, 6]",[],
4,2017,B184E5qee,Improving Neural Language Models with a Contin...,We propose an extension to neural network lang...,"Edouard Grave, Armand Joulin, Nicolas Usunier",,Accept (Poster),"[7, 9, 5]",[natural language processing],
...,...,...,...,...,...,...,...,...,...,...
36108,2025,zxg6601zoc,Re-Imagining Multimodal Instruction Tuning: A ...,Multimodal instruction tuning has proven to be...,"Yiyang Liu, James Chenhao Liang, Ruixiang Tang...","~Yiyang_Liu3, ~James_Chenhao_Liang1, ~Ruixiang...",,[],"[representation tuning, large multimodal model...",
36109,2025,zxqdVo9FjY,Generalization for Least Squares Regression wi...,Random matrix theory has proven to be a valuab...,"Jiping Li, Rishi Sonthalia","~Jiping_Li1, ~Rishi_Sonthalia1",,[],"[generalization, random matrix theory, spiked ...",
36110,2025,zyGrziIVdE,Exploration by Running Away from the Past,The ability to explore efficiently and effecti...,"Paul-Antoine LE TOLGUENEC, Yann Besse, Florent...","~Paul-Antoine_LE_TOLGUENEC1, ~Yann_Besse2, ~Fl...",,[],"[reinforcement learning, exploration, deep lea...",
36111,2025,zz9jAssrwL,Bayesian Policy Distillation via Offline RL fo...,High-performance deep reinforcement learning f...,"Jangwon Kim, Yoonsu Jang, Jonghyeok Park, Yoon...","~Jangwon_Kim2, ~Yoonsu_Jang1, ~Jonghyeok_Park3...",Withdrawn,[],"[neural network compression, reinforcement lea...",


In [6]:
%%time

# Query the accept/reject decisions and scores for 2025 papers
# API cuts you off every 60 queries, then the code sleeps for 30 seconds

for num, forum_id in enumerate(iclr.id):
    if iclr.year[num] < 2025:
        continue
    
    if (num + 1) % 1000 == 0:
        print('*', end='')
    elif (num + 1) % 100 == 0:
        print('.', end='')

    year = iclr.year[num]
    
    forum_url = f'https://api2.openreview.net/notes?forum={forum_id}'
        
    json = requests.get(forum_url).json()
    
    if 'name' in json and json['name'] == 'RateLimitError':
        time.sleep(30)
        json = requests.get(forum_url).json()
        
#     found_authors = False
#     for i in range(len(json['notes'])):
#         if 'authors' in json['notes'][i]['content']:
#             authors = json['notes'][i]['content']['authors']['value']
#             authors = ', '.join(authors)
#             title = json['notes'][-1]['content']['title']['value']
#             abstract = json['notes'][-1]['content']['abstract']['value']
#             keywords = json['notes'][-1]['content']['keywords']['value']
#             found_authors = True
#             break
#     if found_authors:
#         iclr.at[num, 'authors'] = authors
#         iclr.at[num, 'title'] = title
#         iclr.at[num, 'abstract'] = abstract
#         iclr.at[num, 'keywords'] = keywords
#     else:
#         print(f'No authors found: {num}, {forum_id}')

    found_decision = False
    for i in range(len(json['notes'])):
        if 'decision' in json['notes'][i]['content']:
            decision = json['notes'][i]['content']['decision']['value']
            found_decision = True
            break
        if 'withdrawal_confirmation' in json['notes'][i]['content']:
            decision = 'Withdrawn'
            found_decision = True
            break
        if 'desk_reject_comments' in json['notes'][i]['content']:
            decision = 'Desk rejected'
            found_decision = True
            break
    if found_decision:
        iclr.at[num, 'decision'] = decision
    else:
        print(f'No decision found: {num}, {forum_id}')
        
    scores = []
    for i in range(len(json['notes'])):
        if 'rating' in json['notes'][i]['content']:
            score = int(json['notes'][i]['content']['rating']['value'])
            scores.append(score)
    iclr.at[num, 'scores'] = scores

print('')

iclr.to_parquet('../data/iclr25v2.parquet')

.....*.........*.........*.........*.........*.........*.........*.........*.........*.........*.........*.........*.
CPU times: user 8min 1s, sys: 1min 6s, total: 9min 8s
Wall time: 4h 34min 15s


In [9]:
iclr

Unnamed: 0,year,id,title,abstract,authors,author_ids,decision,scores,keywords,labels
0,2017,B1-Hhnslg,Prototypical Networks for Few-shot Learning,A recent approach to few-shot classification c...,"Jake Snell, Kevin Swersky, Richard Zemel",,Reject,"[6, 4, 5]","[deep learning, transfer learning]",
1,2017,B1-q5Pqxl,Machine Comprehension Using Match-LSTM and Ans...,Machine comprehension of text is an important ...,"Shuohang Wang, Jing Jiang",,Accept (Poster),"[6, 6, 7]","[natural language processing, deep learning]",
2,2017,B16Jem9xe,Learning in Implicit Generative Models,Generative adversarial networks (GANs) provide...,"Shakir Mohamed, Balaji Lakshminarayanan",,Invite to Workshop Track,"[8, 7, 6]",[unsupervised learning],
3,2017,B16dGcqlx,Third Person Imitation Learning,Reinforcement learning (RL) makes it possible ...,"Bradly C Stadie, Pieter Abbeel, Ilya Sutskever",,Accept (Poster),"[6, 5, 6]",[],
4,2017,B184E5qee,Improving Neural Language Models with a Contin...,We propose an extension to neural network lang...,"Edouard Grave, Armand Joulin, Nicolas Usunier",,Accept (Poster),"[7, 9, 5]",[natural language processing],
...,...,...,...,...,...,...,...,...,...,...
36108,2025,zxg6601zoc,Re-Imagining Multimodal Instruction Tuning: A ...,Multimodal instruction tuning has proven to be...,"Yiyang Liu, James Chenhao Liang, Ruixiang Tang...","~Yiyang_Liu3, ~James_Chenhao_Liang1, ~Ruixiang...",Accept (Poster),"[6, 6, 6, 5]","[representation tuning, large multimodal model...",
36109,2025,zxqdVo9FjY,Generalization for Least Squares Regression wi...,Random matrix theory has proven to be a valuab...,"Jiping Li, Rishi Sonthalia","~Jiping_Li1, ~Rishi_Sonthalia1",Reject,"[5, 3, 5, 5, 6]","[generalization, random matrix theory, spiked ...",
36110,2025,zyGrziIVdE,Exploration by Running Away from the Past,The ability to explore efficiently and effecti...,"Paul-Antoine LE TOLGUENEC, Yann Besse, Florent...","~Paul-Antoine_LE_TOLGUENEC1, ~Yann_Besse2, ~Fl...",Reject,"[3, 3, 5, 3]","[reinforcement learning, exploration, deep lea...",
36111,2025,zz9jAssrwL,Bayesian Policy Distillation via Offline RL fo...,High-performance deep reinforcement learning f...,"Jangwon Kim, Yoonsu Jang, Jonghyeok Park, Yoon...","~Jangwon_Kim2, ~Yoonsu_Jang1, ~Jonghyeok_Park3...",Withdrawn,"[3, 3, 6]","[neural network compression, reinforcement lea...",
