# Scraping OpenReview

This is the scraping data file. provide the conferences to 'conference_invitations' list and run the file

In [1]:
!pip install openreview-py



In [2]:
import openreview
client = openreview.Client(baseurl='https://api.openreview.net')
# client = openreview.api.OpenReviewClient(baseurl='https://api2.openreview.net')

In [3]:
import pickle
from typing import Iterable, List
import pandas as pd

import itertools


def get_papers(conference_invitations: List[str]) -> Iterable['openreview.api.Note']:
    for invitation in conference_invitations:
        # this only gives us the first 1000 papers
        # paper_iterable = openreview.tools.efficient_iterget(
        #     client.get_notes,
        #     desc='Fetching papers',
        #     # kwargs for client.get_all_notes below
        #     invitation=invitation,
        #     details='directReplies'
        # )
        print(f'Fetching papers for invitation {invitation}')
        paper_iterable = client.get_all_notes(
            invitation=invitation,
            details='directReplies'
        )

        yield from paper_iterable


def get_paper_and_review_df(papers: List['openreview.api.Note']) -> pd.DataFrame:
    # note that to_json() method discards directReplies
    paper_df = pd.DataFrame([paper.to_json() for paper in papers])

    # we can join this with the paper_df on paper_df.id == review_df.forum
    flattened_reviews = itertools.chain.from_iterable(paper.details['directReplies'] for paper in papers)
    reply_df = pd.DataFrame(list(flattened_reviews))

    # when a reply is a review, it has 'Reviewer_' in ['signatures'], which is a list of strings
    review_df = reply_df[reply_df.signatures.apply(lambda x: any('Reviewer_' in s for s in x))]

    # I'm not sure how to use these.
    # non_review_df = reply_df[~reply_df.index.isin(review_df.index)]
    # display(non_review_df)

    return paper_df, review_df

In [4]:
# scraping and saving to pickle file

conference_invitations = [
    'NeurIPS.cc/2022/Conference/-/Blind_Submission',
    'ICLR.cc/2023/Conference/-/Blind_Submission',
]


all_papers = list(get_papers(conference_invitations))
paper_df, review_df = get_paper_and_review_df(all_papers)


with open('scraped-dataframes.pkl', 'wb') as f:
    pickle.dump({'paper_df': paper_df, 'review_df': review_df}, f)


Fetching papers for invitation NeurIPS.cc/2022/Conference/-/Blind_Submission


Getting V1 Notes: 100%|█████████▉| 2821/2824 [00:04<00:00, 599.29it/s]


Fetching papers for invitation ICLR.cc/2023/Conference/-/Blind_Submission


Getting V1 Notes: 100%|█████████▉| 3792/3796 [00:14<00:00, 262.93it/s]


## exploring what we have scraped

the rest of this notebook is a guide on how to use the scraped data

In [5]:
# loading from pickle file
with open('scraped-dataframes.pkl', 'rb') as f:
    data = pickle.load(f)
    paper_df = pd.DataFrame(data['paper_df'])
    review_df = pd.DataFrame(data['review_df'])
    


In [6]:
paper_df

Unnamed: 0,id,original,cdate,pdate,odate,mdate,tcdate,tmdate,ddate,number,content,forum,referent,invitation,replyto,readers,nonreaders,signatures,writers
0,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1.667239e+12,,,1652737363731,1720019111771,,2310,{'title': 'BEVFusion: A Simple and Robust LiDA...,zzDrPqn57DL,,NeurIPS.cc/2022/Conference/-/Blind_Submission,,[everyone],[],[NeurIPS.cc/2022/Conference],[NeurIPS.cc/2022/Conference]
1,zz0FC7qBpkh,7cZ94HqqMn,1652737830798,1.667239e+12,,,1652737830798,1720018324012,,12079,{'title': 'The Missing Invariance Principle fo...,zz0FC7qBpkh,,NeurIPS.cc/2022/Conference/-/Blind_Submission,,[everyone],[],[NeurIPS.cc/2022/Conference],[NeurIPS.cc/2022/Conference]
2,zyrBT58h_J,GMUaJ0joiTU,1652737730463,1.667239e+12,,,1652737730463,1720018470521,,9989,{'title': 'Sustainable Online Reinforcement Le...,zyrBT58h_J,,NeurIPS.cc/2022/Conference/-/Blind_Submission,,[everyone],[],[NeurIPS.cc/2022/Conference],[NeurIPS.cc/2022/Conference]
3,zvNMzjOizmn,r9JcSZTViEx,1652737743370,1.667239e+12,,,1652737743370,1720018453615,,10262,{'title': 'Langevin Autoencoders for Learning ...,zvNMzjOizmn,,NeurIPS.cc/2022/Conference/-/Blind_Submission,,[everyone],[],[NeurIPS.cc/2022/Conference],[NeurIPS.cc/2022/Conference]
4,zuL5OYIBgcV,AH5xEKmMUnH,1652737344613,1.667239e+12,,,1652737344613,1720019149089,,1891,"{'title': 'Non-deep Networks', 'authorids': ['...",zuL5OYIBgcV,,NeurIPS.cc/2022/Conference/-/Blind_Submission,,[everyone],[],[NeurIPS.cc/2022/Conference],[NeurIPS.cc/2022/Conference]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6615,-4Maz7s3YXz,SUZNkFXFIh,1663850125224,1.675280e+12,1.664468e+12,,1663850125224,1676330899214,,2785,{'title': 'Towards Understanding Robust Memori...,-4Maz7s3YXz,,ICLR.cc/2023/Conference/-/Blind_Submission,,[everyone],[],[ICLR.cc/2023/Conference],[ICLR.cc/2023/Conference]
6616,-4DiyBMgv9m,SRt6LbSJc-u,1663850541741,1.675280e+12,1.664468e+12,,1663850541741,1676330786166,,6214,{'title': 'Identifying Phase Transition Thresh...,-4DiyBMgv9m,,ICLR.cc/2023/Conference/-/Blind_Submission,,[everyone],[],[ICLR.cc/2023/Conference],[ICLR.cc/2023/Conference]
6617,-2zfgNS917,KQ3zOvPeO3,1663849815845,1.675279e+12,1.664468e+12,,1663849815845,1732526208563,,143,{'title': 'BEVDistill: Cross-Modal BEV Distill...,-2zfgNS917,,ICLR.cc/2023/Conference/-/Blind_Submission,,[everyone],[],[ICLR.cc/2023/Conference],[ICLR.cc/2023/Conference]
6618,-1x2-lp1eZf,e-VvI0lh-OS,1663850128070,1.675280e+12,1.664468e+12,,1663850128070,1676330898359,,2809,{'title': 'Rethinking Deep Spiking Neural Netw...,-1x2-lp1eZf,,ICLR.cc/2023/Conference/-/Blind_Submission,,[everyone],[],[ICLR.cc/2023/Conference],[ICLR.cc/2023/Conference]


In [7]:
review_df

Unnamed: 0,id,original,number,cdate,mdate,ddate,tcdate,tmdate,tddate,forum,replyto,invitation,content,signatures,readers,nonreaders,writers,pdate,odate
0,VrENUT1h6-,,1,1656944073004,,,1656944073004,1656944073004,,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '7: Accept: Technically solid paper...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
1,B8g021IaaAm,,2,1657487808768,,,1657487808768,1660833391821,,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '5: Borderline accept: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
2,HNAOCsCSzZw,,3,1657585011273,,,1657585011273,1657585011273,,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '5: Borderline accept: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
3,uVn6Us0aP_,,4,1657587291965,,,1657587291965,1657587291965,,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '4: Borderline reject: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
4,Hd6Ce57Ircd,,5,1658042263683,,,1658042263683,1658049110257,,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,"{'rating': '6: Weak Accept: Technically solid,...",[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39365,nC5K4ES2guS,,4,1667286565916,,,1667286565916,1667286565916,,-1x2-lp1eZf,-1x2-lp1eZf,ICLR.cc/2023/Conference/Paper2809/-/Official_R...,{'confidence': '4: You are confident in your a...,[ICLR.cc/2023/Conference/Paper2809/Reviewer_86ui],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,
39366,CA9y6TTZGq,,5,1667293855370,,,1667293855370,1667294200802,,-1x2-lp1eZf,-1x2-lp1eZf,ICLR.cc/2023/Conference/Paper2809/-/Official_R...,{'confidence': '5: You are absolutely certain ...,[ICLR.cc/2023/Conference/Paper2809/Reviewer_BQAF],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,
39369,UgGANSuJ2pm,,1,1665995913803,,,1665995913803,1666019168781,,-0tPmzgXS5,-0tPmzgXS5,ICLR.cc/2023/Conference/Paper211/-/Official_Re...,{'confidence': '4: You are confident in your a...,[ICLR.cc/2023/Conference/Paper211/Reviewer_Xyj5],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,
39370,eveHfO7L3IV,,2,1666649632568,,,1666649632568,1669110135900,,-0tPmzgXS5,-0tPmzgXS5,ICLR.cc/2023/Conference/Paper211/-/Official_Re...,{'confidence': '4: You are confident in your a...,[ICLR.cc/2023/Conference/Paper211/Reviewer_6srd],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,


In [8]:
review_df.iloc[0].content

{'rating': '7: Accept: Technically solid paper, with high impact on at least one sub-area, or moderate-to-high impact on more than one areas, with good-to-excellent evaluation, resources, reproducibility, and no unaddressed ethical considerations.',
 'confidence': '3: You are fairly confident in your assessment. It is possible that you did not understand some parts of the submission or that you are unfamiliar with some pieces of related work. Math/other details were not carefully checked.',
 'summary': 'Towards the problem of current methods tend to fail at situations where hardware malfunctions, this paper presents a simple yet effective LiDAR-Camera fusion framework, namely BEVFusion. By disentangling camera pipeline from LiDAR network and using a dynamic fusion module, BEVFusion achieves SOTA performance and shows robustness against LiDAR or camera malfunction at the same time. An effective modification on the camera pipeline is also proposed to boost the final performance.',
 'stre

In [9]:
review_df.iloc[0].content

{'rating': '7: Accept: Technically solid paper, with high impact on at least one sub-area, or moderate-to-high impact on more than one areas, with good-to-excellent evaluation, resources, reproducibility, and no unaddressed ethical considerations.',
 'confidence': '3: You are fairly confident in your assessment. It is possible that you did not understand some parts of the submission or that you are unfamiliar with some pieces of related work. Math/other details were not carefully checked.',
 'summary': 'Towards the problem of current methods tend to fail at situations where hardware malfunctions, this paper presents a simple yet effective LiDAR-Camera fusion framework, namely BEVFusion. By disentangling camera pipeline from LiDAR network and using a dynamic fusion module, BEVFusion achieves SOTA performance and shows robustness against LiDAR or camera malfunction at the same time. An effective modification on the camera pipeline is also proposed to boost the final performance.',
 'stre

In [10]:
# this is how you can join them

merged_df = pd.merge(
    paper_df,
    review_df,
    left_on='id',
    right_on='forum',
    suffixes=('_paper', '_review')
)

merged_df


Unnamed: 0,id_paper,original_paper,cdate_paper,pdate_paper,odate_paper,mdate_paper,tcdate_paper,tmdate_paper,ddate_paper,number_paper,...,forum_review,replyto_review,invitation_review,content_review,signatures_review,readers_review,nonreaders_review,writers_review,pdate_review,odate_review
0,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1.667239e+12,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '7: Accept: Technically solid paper...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
1,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1.667239e+12,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '5: Borderline accept: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
2,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1.667239e+12,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '5: Borderline accept: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
3,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1.667239e+12,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '4: Borderline reject: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
4,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1.667239e+12,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,"{'rating': '6: Weak Accept: Technically solid,...",[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24796,-1x2-lp1eZf,e-VvI0lh-OS,1663850128070,1.675280e+12,1.664468e+12,,1663850128070,1676330898359,,2809,...,-1x2-lp1eZf,-1x2-lp1eZf,ICLR.cc/2023/Conference/Paper2809/-/Official_R...,{'confidence': '4: You are confident in your a...,[ICLR.cc/2023/Conference/Paper2809/Reviewer_86ui],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,
24797,-1x2-lp1eZf,e-VvI0lh-OS,1663850128070,1.675280e+12,1.664468e+12,,1663850128070,1676330898359,,2809,...,-1x2-lp1eZf,-1x2-lp1eZf,ICLR.cc/2023/Conference/Paper2809/-/Official_R...,{'confidence': '5: You are absolutely certain ...,[ICLR.cc/2023/Conference/Paper2809/Reviewer_BQAF],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,
24798,-0tPmzgXS5,s3mA4mRGFzT,1663849824402,1.675280e+12,1.664468e+12,,1663849824402,1676330986530,,211,...,-0tPmzgXS5,-0tPmzgXS5,ICLR.cc/2023/Conference/Paper211/-/Official_Re...,{'confidence': '4: You are confident in your a...,[ICLR.cc/2023/Conference/Paper211/Reviewer_Xyj5],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,
24799,-0tPmzgXS5,s3mA4mRGFzT,1663849824402,1.675280e+12,1.664468e+12,,1663849824402,1676330986530,,211,...,-0tPmzgXS5,-0tPmzgXS5,ICLR.cc/2023/Conference/Paper211/-/Official_Re...,{'confidence': '4: You are confident in your a...,[ICLR.cc/2023/Conference/Paper211/Reviewer_6srd],[everyone],[],"[ICLR.cc/2023/Conference, ICLR.cc/2023/Confere...",,


In [11]:
# these are the reveiws for a paper
merged_df[merged_df['id_paper'] == paper_df.iloc[0].id]

Unnamed: 0,id_paper,original_paper,cdate_paper,pdate_paper,odate_paper,mdate_paper,tcdate_paper,tmdate_paper,ddate_paper,number_paper,...,forum_review,replyto_review,invitation_review,content_review,signatures_review,readers_review,nonreaders_review,writers_review,pdate_review,odate_review
0,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1667239000000.0,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '7: Accept: Technically solid paper...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
1,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1667239000000.0,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '5: Borderline accept: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
2,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1667239000000.0,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '5: Borderline accept: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
3,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1667239000000.0,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,{'rating': '4: Borderline reject: Technically ...,[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
4,zzDrPqn57DL,HXF7e04ViJI,1652737363731,1667239000000.0,,,1652737363731,1720019111771,,2310,...,zzDrPqn57DL,zzDrPqn57DL,NeurIPS.cc/2022/Conference/Paper2310/-/Officia...,"{'rating': '6: Weak Accept: Technically solid,...",[NeurIPS.cc/2022/Conference/Paper2310/Reviewer...,[everyone],[],"[NeurIPS.cc/2022/Conference, NeurIPS.cc/2022/C...",,
