# Download the blind ICLR2026 data from OpenReview

In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd
import pylab as plt

import requests
import time

In [2]:
%%time

# Download titles/abstracts/authors of all papers (this part is fast)

titles = []
abstracts = []
years = []
forum_ids = []
decisions = []
authors = []
author_ids = []
keywords = []

for year in [2026]:
    print(year, end=': ')
    for query in ['submission', 'Submission', 'Blind_Submission', 
                  'Withdrawn_Submission', 'Rejected_Submission', 
                  'Desk_Rejected_Submission', '']:
        if year <= 2017:
            if query == '':
                continue
            url = f'https://api.openreview.net/notes?invitation=ICLR.cc%2F{year}%2Fconference%2F-%2F{query}'
        elif year <= 2023:
            if query == '':
                continue
            url = f'https://api.openreview.net/notes?invitation=ICLR.cc%2F{year}%2FConference%2F-%2F{query}'
        else:
            if query != '':
                query = '/' + query
            url = f'https://api2.openreview.net/notes?content.venueid=ICLR.cc/{year}/Conference{query}'        
            
        for offset in range(0, 20_000, 1000):
            json = requests.get(url + f'&offset={offset}').json()
            
            if 'name' in json and json['name'] == 'RateLimitError':
                time.sleep(30)
                json = requests.get(url + f'&offset={offset}').json()
                
            df = pd.DataFrame(json['notes'])
            if len(df) > 0:
                print(len(df), end=' ')
                if year < 2024:
                    titles    += [d['title'].strip() for d in df['content'].values]
                    abstracts += [d['abstract'].strip() for d in df['content'].values]
                    keywords  += [d['keywords'] for d in df['content'].values]
                    if year == 2017:
                        authors   += [', '.join(d['authors']) if isinstance(d['authors'], list)
                                      else d['authors'] for d in df['content']]
                        author_ids = [', '.join(d['authorids']) if 'authorids' in d 
                                      else d['author_emails'] for d in df['content']]
                    else:
                        authors   += [', '.join(d['authors']) for d in df['content']]
                        author_ids  += [', '.join(d['authorids']) for d in df['content']]
                else:
                    titles    += [d['title']['value'].strip() for d in df['content'].values]
                    abstracts += [d['abstract']['value'].strip() for d in df['content'].values]
                    keywords  += [d['keywords']['value'] for d in df['content'].values]
                    if 'authors' in df['content'].values[0]:
                        authors   += [', '.join(d['authors']['value']) for d in df['content'].values]
                        author_ids   += [', '.join(d['authorids']['value']) for d in df['content'].values]
                    else:
                        authors += [''] * len(df)
                        author_ids += [''] * len(df)
                years     += [year] * len(df)
                forum_ids += list(df.forum)
                                                
                if 'Withdrawn_Submission' in query:
                    decisions += ['Withdrawn'] * len(df)
                elif 'Desk_Rejected_Submission' in query:
                    decisions += ['Desk rejected'] * len(df)
                elif 'Rejected_Submission' in query:
                    decisions += ['Reject'] * len(df)    
                else:
                    decisions += [''] * len(df)
            else:
                break
    print('')
print('')

print(f'Found {len(titles)} papers\n')

2026: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 674 123 

Found 19797 papers

CPU times: user 2.97 s, sys: 213 ms, total: 3.18 s
Wall time: 49.5 s


In [3]:
# Prepare the dataframe

# Split keyword strings with semicolons instead of commas
keywords = [
    [kk.strip() for kk in k[0].split(";") if kk.strip() != ''] 
    if len(k) > 0 and ";" in k[0] else k
    for k in keywords
]

# Remove trailing periods from keywords
keywords = [
    k[:-1] + [k[-1].strip()[:-1]]
    if len(k) > 0 and "." in k[-1].strip()[-1] else k
    for k in keywords
]

# Make sure all is lower-case
keywords = [[kk.lower() for kk in k] for k in keywords]

iclr = pd.DataFrame.from_dict({
    'year': np.array(years).astype(int), 
    'id': forum_ids, 
    'title': titles, 
    'abstract': abstracts,
    'authors': authors,
    'author_ids': author_ids,
    'decision': decisions,
    'scores': [[]] * len(abstracts),
    'keywords': keywords,
})

# Kicking out nonsense abstracts

mask = np.array([len(a) >= 100 for a in iclr.abstract])

print(f'Removing {np.sum(~mask)} submissions with abstract length below 100 characters:')
for abstract in iclr[~mask]['abstract'].values:
    print('  ' + abstract)    
iclr = iclr[mask].reset_index(drop=True)

Removing 4 submissions with abstract length below 100 characters:
  This is a null submission.
  1
  Scaling Laws of Time Series: Performance Gains from Increasing Input Length
  Video Gen


In [4]:
# Inspect

assert iclr.id.unique().size == len(iclr)

iclr

Unnamed: 0,year,id,title,abstract,authors,author_ids,decision,scores,keywords
0,2026,FPLNSx1jmL,Improving Developer Emotion Classification via...,Detecting developer emotion in the informative...,,,,[],"[emotion detection, commit messages, software ..."
1,2026,y5rLR9xZpn,Quantum-Inspired Image Encodings for Financial...,This study proposes a quantum-inspired methodo...,,,,[],"[time-series classification, image encoding, q..."
2,2026,kiVIVBmMTP,SAVIOR: Sample-efficient Alignment of Vision-L...,Modern enterprises are increasingly adopting b...,,,,[],"[finance, document processing, optical charact..."
3,2026,IKJyRyHpHV,Revisiting Multilingual Data Mixtures in Langu...,The impact of different multilingual data mixt...,,,,[],"[multilingual llms, multilinguality, cross-lin..."
4,2026,GGg2BmcBEp,One-Shot Style Personalization for RL Agents v...,Reinforcement learning (RL) has achieved remar...,,,,[],"[reinforcement learning, agent alignment]"
...,...,...,...,...,...,...,...,...,...
19788,2026,d7OM1LQp5t,Animer: Generating Editable Videos from Images...,Our method combines advanced computer vision t...,"Junhao Chen, Gao Kejun, Xiang Li, Fangsheng We...","~Junhao_Chen2, ~Gao_Kejun1, ~Xiang_Li68, ~Fang...",Withdrawn,[],[video generation]
19789,2026,WBys7MARr3,Damon: Dynamic model pruning for Dense Large L...,"With a vast number of parameters, Large Langua...","Jiateng Wei, Huan Wang","~Jiateng_Wei1, ~Huan_Wang3",Withdrawn,[],"[llms, structured pruning, dynamic sparsity]"
19790,2026,CmcHvBLAhE,Looking-back: Implicit Video Clips Re-focusing...,Multimodal large language models (MLLMs) on vi...,Jinlong Li,~Jinlong_Li3,Withdrawn,[],"[mllm, reasoning, 3d, spatial, perception]"
19791,2026,d5snaHpQm2,Incentivizing Visual Thinking Cues via Reinfor...,Modern vision-language models struggle with lo...,Jinlong Li,~Jinlong_Li3,Withdrawn,[],"[thinking, reinforcement, reasoning, planning,..."


In [5]:
# Load iclr25v2 and clean keywords

iclr25v2 = pd.read_parquet('../data/iclr25v2.parquet')

keywords = iclr25v2.keywords.values

keywords = [list(k) for k in keywords]

print(len([k for k in keywords if len(k) > 0 and ";" in k[0]]))

# Split keyword strings with semicolons instead of commas
keywords = [
    [kk.strip() for kk in k[0].split(";") if kk.strip() != ''] 
    if len(k) > 0 and ";" in k[0] else k
    for k in keywords
]

# Remove trailing periods from keywords
keywords = [
    k[:-1] + [k[-1].strip()[:-1]]
    if len(k) > 0 and "." in k[-1].strip()[-1] else k
    for k in keywords
]

# Make sure all is lower-case
keywords = [[kk.lower() for kk in k] for k in keywords]

iclr25v2.keywords = keywords

iclr25v2

1154


Unnamed: 0,year,id,title,abstract,authors,author_ids,decision,scores,keywords,labels
0,2017,B1-Hhnslg,Prototypical Networks for Few-shot Learning,A recent approach to few-shot classification c...,"Jake Snell, Kevin Swersky, Richard Zemel",,Reject,"[6, 4, 5]","[deep learning, transfer learning]",transfer learning
1,2017,B1-q5Pqxl,Machine Comprehension Using Match-LSTM and Ans...,Machine comprehension of text is an important ...,"Shuohang Wang, Jing Jiang",,Accept (Poster),"[6, 6, 7]","[natural language processing, deep learning]",language models
2,2017,B16Jem9xe,Learning in Implicit Generative Models,Generative adversarial networks (GANs) provide...,"Shakir Mohamed, Balaji Lakshminarayanan",,Invite to Workshop Track,"[8, 7, 6]",[unsupervised learning],unlabeled
3,2017,B16dGcqlx,Third Person Imitation Learning,Reinforcement learning (RL) makes it possible ...,"Bradly C Stadie, Pieter Abbeel, Ilya Sutskever",,Accept (Poster),"[6, 5, 6]",[],unlabeled
4,2017,B184E5qee,Improving Neural Language Models with a Contin...,We propose an extension to neural network lang...,"Edouard Grave, Armand Joulin, Nicolas Usunier",,Accept (Poster),"[7, 9, 5]",[natural language processing],language models
...,...,...,...,...,...,...,...,...,...,...
36108,2025,zxg6601zoc,Re-Imagining Multimodal Instruction Tuning: A ...,Multimodal instruction tuning has proven to be...,"Yiyang Liu, James Chenhao Liang, Ruixiang Tang...","~Yiyang_Liu3, ~James_Chenhao_Liang1, ~Ruixiang...",Accept (Poster),"[6, 6, 6, 5]","[representation tuning, large multimodal model...",unlabeled
36109,2025,zxqdVo9FjY,Generalization for Least Squares Regression wi...,Random matrix theory has proven to be a valuab...,"Jiping Li, Rishi Sonthalia","~Jiping_Li1, ~Rishi_Sonthalia1",Reject,"[5, 3, 5, 5, 6]","[generalization, random matrix theory, spiked ...",unlabeled
36110,2025,zyGrziIVdE,Exploration by Running Away from the Past,The ability to explore efficiently and effecti...,"Paul-Antoine LE TOLGUENEC, Yann Besse, Florent...","~Paul-Antoine_LE_TOLGUENEC1, ~Yann_Besse2, ~Fl...",Reject,"[3, 3, 5, 3]","[reinforcement learning, exploration, deep lea...",RL
36111,2025,zz9jAssrwL,Bayesian Policy Distillation via Offline RL fo...,High-performance deep reinforcement learning f...,"Jangwon Kim, Yoonsu Jang, Jonghyeok Park, Yoon...","~Jangwon_Kim2, ~Yoonsu_Jang1, ~Jonghyeok_Park3...",Withdrawn,"[3, 3, 6]","[neural network compression, reinforcement lea...",RL


In [6]:
# Combine and save

iclr25v2 = pd.read_parquet('../data/iclr25v2.parquet')

iclr26v1 = pd.concat((iclr25v2, iclr.sort_values(by='id'))).reset_index(drop=True)

iclr26v1.to_parquet('../data/iclr26v1.parquet')

iclr26v1

Unnamed: 0,year,id,title,abstract,authors,author_ids,decision,scores,keywords,labels
0,2017,B1-Hhnslg,Prototypical Networks for Few-shot Learning,A recent approach to few-shot classification c...,"Jake Snell, Kevin Swersky, Richard Zemel",,Reject,"[6, 4, 5]","[deep learning, transfer learning]",transfer learning
1,2017,B1-q5Pqxl,Machine Comprehension Using Match-LSTM and Ans...,Machine comprehension of text is an important ...,"Shuohang Wang, Jing Jiang",,Accept (Poster),"[6, 6, 7]","[natural language processing, deep learning]",language models
2,2017,B16Jem9xe,Learning in Implicit Generative Models,Generative adversarial networks (GANs) provide...,"Shakir Mohamed, Balaji Lakshminarayanan",,Invite to Workshop Track,"[8, 7, 6]",[unsupervised learning],unlabeled
3,2017,B16dGcqlx,Third Person Imitation Learning,Reinforcement learning (RL) makes it possible ...,"Bradly C Stadie, Pieter Abbeel, Ilya Sutskever",,Accept (Poster),"[6, 5, 6]",[],unlabeled
4,2017,B184E5qee,Improving Neural Language Models with a Contin...,We propose an extension to neural network lang...,"Edouard Grave, Armand Joulin, Nicolas Usunier",,Accept (Poster),"[7, 9, 5]",[natural language processing],language models
...,...,...,...,...,...,...,...,...,...,...
55901,2026,zz3El6hqbs,Learning activation functions with PCA on a se...,This work explores a novel approach to learnin...,,,,[],"[deep neural networks, activation function lea...",
55902,2026,zzJTo7ujql,Phased DMD: Few-step Distribution Matching Dis...,Distribution Matching Distillation (DMD) disti...,,,,[],"[diffusion models, distribution matching, dist...",
55903,2026,zzTDulLys0,vAttention: Verified Sparse Attention via Samp...,State-of-the-art sparse attention methods for ...,,,,[],[sparse attention],
55904,2026,zzTQISAGUp,Polychromic Objectives for Reinforcement Learning,Reinforcement learning fine-tuning (RLFT) is a...,,,,[],"[reinforcement learning, exploration]",


In [7]:
[np.sum(iclr26v1.year==y) for y in np.arange(2017, 2027)]

[489, 1012, 1569, 2593, 3009, 3422, 4955, 7401, 11663, 19793]

In [8]:
iclr = pd.read_parquet('../data/iclr26v1.parquet')

keys = ["3d reconstruction", "novel view synthesis", "nerf", "gaussian splatting",
        "speech synthesis", "text-to-speech", "speech recognition",
        "drug discovery", "molecule generation",
        "partial differential equations", "partial differential equation", 
        "dynamical system", "dynamical systems", "pdes", "pde"]

allkeys = np.concatenate([k for k in iclr.keywords.values])

for key in keys:
    print(f"{key:30}", np.sum(allkeys == key))

3d reconstruction              121
novel view synthesis           99
nerf                           52
gaussian splatting             106
speech synthesis               42
text-to-speech                 45
speech recognition             54
drug discovery                 91
molecule generation            56
partial differential equations 95
partial differential equation  29
dynamical system               38
dynamical systems              141
pdes                           45
pde                            44
