In [18]:
import openreview
import pandas as pd
from selenium import webdriver
import re
import difflib
from datetime import datetime
from pyarxiv import query, download_entries
from pyarxiv.arxiv_categories import ArxivCategory, arxiv_category_map

_RE_COMBINE_WHITESPACE = re.compile(r"\s+")

c = openreview.Client(baseurl='https://openreview.net')

# openreview data

In [2]:
submissions = openreview.tools.iterget_notes(c, invitation='ICLR.cc/2018/Conference/-/Blind_Submission')
submissions_by_forum = {n.forum: n for n in submissions}


In [3]:
submissions_by_forum["Hk6kPgZA-"].content

{'title': 'Certifying Some Distributional Robustness with Principled Adversarial Training',
 'abstract': 'Neural networks are vulnerable to adversarial examples and researchers have proposed many heuristic attack and defense mechanisms. We address this problem through the principled lens of distributionally robust optimization, which guarantees performance under adversarial input perturbations.  By considering a Lagrangian penalty formulation of perturbing the underlying data distribution in a Wasserstein ball, we provide a training procedure that augments model parameter updates with worst-case perturbations of training data. For smooth losses, our procedure provably achieves moderate levels of robustness with little computational or statistical cost relative to empirical risk minimization. Furthermore, our statistical guarantees allow us to efficiently certify robustness for the population loss. For imperceptible perturbations, our method matches or outperforms heuristic approaches.\

In [4]:
forum_title = [submissions_by_forum[forum].content['title'] for forum in submissions_by_forum ]
author_1 = [submissions_by_forum[forum].content['authors'][0] for forum in submissions_by_forum ]
author_2 = [submissions_by_forum[forum].content['authors'][1] if len(submissions_by_forum[forum].content['authors']) >=2 else "" for forum in submissions_by_forum ]
author_3 = [submissions_by_forum[forum].content['authors'][2] if len(submissions_by_forum[forum].content['authors']) >=3 else "" for forum in submissions_by_forum ]
email_1 = [submissions_by_forum[forum].content['authorids'][0] for forum in submissions_by_forum ]
email_2 = [submissions_by_forum[forum].content['authorids'][1] if len(submissions_by_forum[forum].content['authorids']) >=2 else "" for forum in submissions_by_forum ]
email_3 = [submissions_by_forum[forum].content['authorids'][2] if len(submissions_by_forum[forum].content['authorids']) >=3 else "" for forum in submissions_by_forum ]

open_review_data = pd.DataFrame({"title":forum_title, "author_1":author_1, "author_2":author_2,"author_3":author_3,
                                "email_1": email_1, "email_2": email_2, "email_3":email_3})
open_review_data["accepted"] = ""

In [5]:
driver = webdriver.Chrome()
driver.get("https://openreview.net/group?id=ICLR.cc/2018/Conference#accepted-oral-papers")
driver.implicitly_wait(10)
content = driver.find_elements_by_class_name("note")
accepted_papers = [i.text.split('\n')[0].strip() for i in content if i.text.split('\n')[0] != '' ]
driver.quit()

driver = webdriver.Chrome()
driver.get("https://openreview.net/group?id=ICLR.cc/2018/Conference#rejected-papers")
driver.implicitly_wait(10)
driver.find_element_by_link_text("Rejected Papers").click()
driver.implicitly_wait(10)
content = driver.find_elements_by_class_name("note")
rejected_papers = [i.text.split('\n')[0].strip() for i in content if i.text.split('\n')[0] != '' ]
driver.quit()


driver = webdriver.Chrome()
driver.get("https://openreview.net/group?id=ICLR.cc/2018/Conference#accepted-poster-papers")
driver.implicitly_wait(10)
driver.find_element_by_link_text("Poster Papers").click()
driver.implicitly_wait(10)
content = driver.find_elements_by_class_name("note")
accepted_poster_papers = [i.text.split('\n')[0].strip() for i in content if i.text.split('\n')[0] != '' ]
driver.quit()


driver = webdriver.Chrome()
driver.get("https://openreview.net/group?id=ICLR.cc/2018/Conference#workshop-papers")
driver.implicitly_wait(10)
driver.find_element_by_link_text("Invited to submit to Workshop").click()
driver.implicitly_wait(10)
content = driver.find_elements_by_class_name("note")
workshop_papers = [i.text.split('\n')[0].strip() for i in content if i.text.split('\n')[0] != '' ]
driver.quit()


driver = webdriver.Chrome()
driver.get("https://openreview.net/group?id=ICLR.cc/2018/Conference#withdrawn-papers")
driver.implicitly_wait(10)
driver.find_element_by_link_text("Withdrawn Papers").click()
driver.implicitly_wait(10)
content = driver.find_elements_by_class_name("note")
withdrawn_papers = [i.text.split('\n')[0].strip() for i in content if i.text.split('\n')[0] != '' ]
driver.quit()


In [7]:

for i in range(len(open_review_data)) : 
    if _RE_COMBINE_WHITESPACE.sub(" ", open_review_data.loc[i, "title"]).strip() in accepted_papers:
        open_review_data.loc[i, "accepted"] = "a"
    if _RE_COMBINE_WHITESPACE.sub(" ", open_review_data.loc[i, "title"]).strip() in rejected_papers:
        open_review_data.loc[i, "accepted"] = "r"
    if _RE_COMBINE_WHITESPACE.sub(" ", open_review_data.loc[i, "title"]).strip() in withdrawn_papers:
        open_review_data.loc[i, "accepted"] = "w"
    if _RE_COMBINE_WHITESPACE.sub(" ", open_review_data.loc[i, "title"]).strip() in accepted_poster_papers:
        open_review_data.loc[i, "accepted"] = "ap"
    if _RE_COMBINE_WHITESPACE.sub(" ", open_review_data.loc[i, "title"]).strip() in workshop_papers:
        open_review_data.loc[i, "accepted"] = "wp"


In [8]:
open_review_data

Unnamed: 0,title,author_1,author_2,author_3,email_1,email_2,email_3,accepted
0,Certifying Some Distributional Robustness with...,Aman Sinha,Hongseok Namkoong,John Duchi,amans@stanford.edu,hnamk@stanford.edu,jduchi@stanford.edu,a
1,Parametric Information Bottleneck to Optimize ...,Thanh T. Nguyen,Jaesik Choi,,thanhnguyen2792@gmail.com,jaesik@unist.ac.kr,,r
2,Towards Neural Phrase-based Machine Translation,Po-Sen Huang,Chong Wang,Sitao Huang,huang.person@gmail.com,chongw@google.com,shuang91@illinois.edu,ap
3,Weightless: Lossy Weight Encoding For Deep Neu...,Brandon Reagen,Udit Gupta,Robert Adolf,reagen@fas.harvard.edu,ugupta@g.harvard.edu,rdadolf@seas.harvard.edu,wp
4,Interactive Grounded Language Acquisition and ...,Haonan Yu,Haichao Zhang,Wei Xu,haonanyu@baidu.com,zhanghaichao@baidu.com,wei.xu@baidu.com,ap
5,A Boo(n) for Evaluating Architecture Performance,Ondrej Bajgar,Rudolf Kadlec,and Jan Kleindienst,ondrej@bajgar.org,rudolf_kadlec@cz.ibm.com,jankle@cz.ibm.com,r
6,Countering Adversarial Images using Input Tran...,Chuan Guo,Mayank Rana,Moustapha Cisse,cg563@cornell.edu,mayankrana@fb.com,moustaphacisse@fb.com,ap
7,A Neural Representation of Sketch Drawings,David Ha,Douglas Eck,,hadavid@google.com,deck@google.com,,ap
8,The Kanerva Machine: A Generative Distributed ...,Yan Wu,Greg Wayne,Alex Graves,yanwu@google.com,gregwayne@google.com,gravesa@google.com,ap
9,Minimax Curriculum Learning: Machine Teaching ...,Tianyi Zhou,Jeff Bilmes,,tianyi.david.zhou@gmail.com,bilmes@uw.edu,,ap


In [10]:
open_review_data.loc[open_review_data['accepted'] == '']

Unnamed: 0,title,author_1,author_2,author_3,email_1,email_2,email_3,accepted


In [11]:
open_review_data["accepted"].unique()

array(['a', 'r', 'ap', 'wp'], dtype=object)

# Arxiv Data

In [58]:

open_review_data["submitted_arxiv"] = 0
for index, row in open_review_data.iterrows():
    print(index)
    entries = query(title = re.sub(r"[,.;@#?!&$-]+\ *", " ", row['title']))
    #only when search the query, results existed and the title in ARXIV has more than 90% similarity with the title in openreview, and publish data is before 2018-10-01.
    if entries and difflib.SequenceMatcher(None, entries[0]["title"], row['title']).ratio() >= 0.9 and datetime.strptime(entries[0]["published"].split("T")[0], '%Y-%m-%d') < datetime.strptime('2018-10-01', '%Y-%m-%d'):
        print("submitted to arxiv")
        open_review_data.loc[index,"submitted_arxiv"] = 1




0
submitted to arxiv
1
2
submitted to arxiv
3
submitted to arxiv
4
submitted to arxiv
5
submitted to arxiv
6
submitted to arxiv
7
submitted to arxiv
8
submitted to arxiv
9
10
submitted to arxiv
11
submitted to arxiv
12
submitted to arxiv
13
submitted to arxiv
14
submitted to arxiv
15
submitted to arxiv
16
submitted to arxiv
17
submitted to arxiv
18
submitted to arxiv
19
submitted to arxiv
20
submitted to arxiv
21
22
submitted to arxiv
23
submitted to arxiv
24
submitted to arxiv
25
submitted to arxiv
26
27
28
29
submitted to arxiv
30
31
submitted to arxiv
32
submitted to arxiv
33
34
submitted to arxiv
35
submitted to arxiv
36
submitted to arxiv
37
submitted to arxiv
38
submitted to arxiv
39
submitted to arxiv
40
41
submitted to arxiv
42
43
44
submitted to arxiv
45
46
submitted to arxiv
47
submitted to arxiv
48
49
submitted to arxiv
50
submitted to arxiv
51
submitted to arxiv
52
53
submitted to arxiv
54
submitted to arxiv
55
submitted to arxiv
56
57
submitted to arxiv
58
submitted to arx

525
submitted to arxiv
526
527
submitted to arxiv
528
submitted to arxiv
529
530
531
532
submitted to arxiv
533
534
submitted to arxiv
535
536
537
538
submitted to arxiv
539
540
541
542
543
544
submitted to arxiv
545
submitted to arxiv
546
547
548
549
submitted to arxiv
550
551
552
553
submitted to arxiv
554
555
556
557
558
submitted to arxiv
559
submitted to arxiv
560
submitted to arxiv
561
submitted to arxiv
562
563
564
565
566
567
568
569
submitted to arxiv
570
571
572
573
submitted to arxiv
574
submitted to arxiv
575
576
577
578
submitted to arxiv
579
submitted to arxiv
580
submitted to arxiv
581
582
583
submitted to arxiv
584
585
586
587
submitted to arxiv
588
589
590
591
submitted to arxiv
592
593
submitted to arxiv
594
595
596
597
598
submitted to arxiv
599
submitted to arxiv
600
601
submitted to arxiv
602
submitted to arxiv
603
604
605
submitted to arxiv
606
607
submitted to arxiv
608
submitted to arxiv
609
610
submitted to arxiv
611
submitted to arxiv
612
613
submitted to arxi

In [59]:
open_review_data[open_review_data["submitted_arxiv"] == 1]

Unnamed: 0,title,author_1,author_2,author_3,email_1,email_2,email_3,accepted,submitted_arxiv
0,Certifying Some Distributional Robustness with...,Aman Sinha,Hongseok Namkoong,John Duchi,amans@stanford.edu,hnamk@stanford.edu,jduchi@stanford.edu,a,1
2,Towards Neural Phrase-based Machine Translation,Po-Sen Huang,Chong Wang,Sitao Huang,huang.person@gmail.com,chongw@google.com,shuang91@illinois.edu,ap,1
3,Weightless: Lossy Weight Encoding For Deep Neu...,Brandon Reagen,Udit Gupta,Robert Adolf,reagen@fas.harvard.edu,ugupta@g.harvard.edu,rdadolf@seas.harvard.edu,wp,1
4,Interactive Grounded Language Acquisition and ...,Haonan Yu,Haichao Zhang,Wei Xu,haonanyu@baidu.com,zhanghaichao@baidu.com,wei.xu@baidu.com,ap,1
5,A Boo(n) for Evaluating Architecture Performance,Ondrej Bajgar,Rudolf Kadlec,and Jan Kleindienst,ondrej@bajgar.org,rudolf_kadlec@cz.ibm.com,jankle@cz.ibm.com,r,1
6,Countering Adversarial Images using Input Tran...,Chuan Guo,Mayank Rana,Moustapha Cisse,cg563@cornell.edu,mayankrana@fb.com,moustaphacisse@fb.com,ap,1
7,A Neural Representation of Sketch Drawings,David Ha,Douglas Eck,,hadavid@google.com,deck@google.com,,ap,1
8,The Kanerva Machine: A Generative Distributed ...,Yan Wu,Greg Wayne,Alex Graves,yanwu@google.com,gregwayne@google.com,gravesa@google.com,ap,1
10,Multi-View Data Generation Without View Superv...,Mickael Chen,Ludovic Denoyer,Thierry Artières,mickael.chen@lip6.fr,ludovic.denoyer@lip6.fr,thierry.artieres@lif.univ-mrs.fr,ap,1
11,Towards Binary-Valued Gates for Robust LSTM Tr...,Zhuohan Li,Di He,Fei Tian,lizhuohan@pku.edu.cn,di_he@pku.edu.cn,fetia@microsoft.com,r,1


In [62]:
open_review_data.to_csv("open_review_arxiv_data.csv", index = False)