In [44]:
import pandas as pd
import openreview
from pprint import pprint
import json

from collections import OrderedDict
from typing import Any, Callable, Iterable, List, Dict, Optional

In [8]:
client = openreview.Client(baseurl='https://api.openreview.net')  # API v1

submissions = client.get_all_notes(
    invitation='ICLR.cc/2018/Conference/-/Blind_Submission',
    details='directReplies',   # embed replies (reviews, comments, etc.)
)
# https://openreview.net/forum?id=ryzm6BATZ 
# for paper 0
paper0 = submissions[1]

In [9]:
# Save one paper detail
with open("paper_0.json","w") as file:
    json.dump(paper0.to_json(),file,indent=4)

In [10]:
print(paper0)

{'cdate': 1518730172717,
 'content': {'_bibtex': '@misc{\n'
                        'brakel2018learning,\n'
                        'title={Learning Independent Features with Adversarial '
                        'Nets for Non-linear {ICA}},\n'
                        'author={Philemon Brakel and Yoshua Bengio},\n'
                        'year={2018},\n'
                        'url={https://openreview.net/forum?id=ryykVe-0W},\n'
                        '}',
             'abstract': 'Reliable measures of statistical dependence could '
                         'potentially be useful tools for learning independent '
                         'features and performing tasks like source separation '
                         'using Independent Component Analysis (ICA).  '
                         'Unfortunately, many of such measures, like the '
                         'mutual information, are hard to estimate and '
                         'optimize directly.  We propose to learn independe

In [11]:
# Get the forum of the paper 
reviews = client.get_notes(forum=paper0.forum)
review1 = reviews[1]

In [18]:
for i in range(len(reviews)):
    with open(f"review_{i}.json","w") as file:
        json.dump(reviews[i].to_json(),file,indent=4)

In [50]:
def threadify(
    reviews: Iterable[Any],
    *,
    to_dict: Callable[[Any], Dict[str, Any]] = lambda x: x.to_json() if hasattr(x, "to_json") else dict(x),
    id_key: str = "id",
    parent_key: str = "replyto",
    root_id: Optional[Any] = None,            # if None, we’ll infer from the first item’s .forum if present
    children_key: str = "Sub(s)",             # keep your existing key name
    keep_order: bool = True                   # preserve input order within siblings
) -> List[Dict[str, Any]]:
    """
    Build a nested comment tree of arbitrary depth.

    - reviews: list/iterable of objects or dicts
    - to_dict: how to turn each item into a dict
    - id_key: field with the unique id
    - parent_key: field pointing to parent id
    - root_id: id considered the top-level parent (items replying to this become roots).
               If None, and the first review has attribute `.forum`, that value is used.
    - children_key: key used to store children
    - keep_order: if True, siblings appear in the same order as input
    """
    reviews = list(reviews)
    if not reviews:
        return []

    # Infer root_id from the first element’s `.forum` if not provided
    if root_id is None and hasattr(reviews[0], "forum"):
        root_id = getattr(reviews[0], "forum")

    # Normalize to dicts while preserving input order index (for stable sibling ordering)
    norm: List[Dict[str, Any]] = []
    for idx, item in enumerate(reviews):
        d = to_dict(item)
        d.setdefault(children_key, [])
        d["_order_index"] = idx  # hidden key for ordering; stripped later
        norm.append(d)

    # Index by id
    by_id: "OrderedDict[Any, Dict[str, Any]]" = OrderedDict()
    for d in norm:
        key = d.get(id_key)
        if key is not None:
            by_id[key] = d

    # Attach children to parents where possible
    roots: List[Dict[str, Any]] = []
    for d in norm:
        pid = d.get(parent_key)
        # Root if it replies to the root_id OR if parent is missing/unknown
        if pid == root_id or pid not in by_id:
            roots.append(d)
        else:
            parent = by_id[pid]
            parent.setdefault(children_key, []).append(d)

    # Optionally sort siblings by original input order
    def sort_children(node: Dict[str, Any]):
        if children_key in node and node[children_key]:
            node[children_key].sort(key=lambda x: x["_order_index"])
            for c in node[children_key]:
                sort_children(c)

    if keep_order:
        for r in roots:
            sort_children(r)
        roots.sort(key=lambda x: x["_order_index"])

    # Strip helper keys
    def cleanup(node: Dict[str, Any]):
        node.pop("_order_index", None)
        for c in node.get(children_key, []):
            cleanup(c)

    for r in roots:
        cleanup(r)

    return roots

def getSinglePaperDetail():
    tree = threadify(
        reviews,
        id_key="id",
        parent_key="replyto",
        children_key="Sub(s)",   # keeps your existing name
        # root_id=reviews[0].forum   # optional; inferred if reviews[0] has .forum
    )
    del tree[0]["Sub(s)"]
    paper_reviews = tree[0]

    # Get the decision
    for revObj in reviews[1:]:
        rev = revObj.to_json()
        if rev["content"].get("decision"):
            paper_reviews["decision"] = rev["content"].get("decision")
            break
    
    # Add all comments and reviews
    paper_reviews["reviews"] = tree[1:]

    with open("all.json","w") as file:
        json.dump(paper_reviews,file,indent=4)
    

getSinglePaperDetail()

In [39]:
def review_comment_nesting(reviews:list):
    result = []
    base_id = reviews[0].forum
    # Since the first one is the paper itself
    cp_reviews = reviews[1:].copy() 
    while len(cp_reviews) > 0:
        item = cp_reviews.pop(0)
        review = item.to_json()
        # Main Parent review
        if review['replyto'] == base_id:
            result.append(review)
        else:
            found = False
            for res in result:
                if res.get("id") == review['replyto']:
                    if "Sub(s)" in res:
                        res["Sub(s)"].append(review)
                    else:
                        res["Sub(s)"] = [review]
                    found = True
            if found == False:
                cp_reviews.append(item)

    return result


res = review_comment_nesting(reviews)
for i in range(len(res)):
    with open(f"new_review_{i}.json","w") as file:
        json.dump(res[i],file,indent=4)

KeyboardInterrupt: 

In [None]:
data = []

for note in submissions:
    n = note.to_json()        # convert Note → dict
    c = n.get('content', {})  # the main content fields
    print(n)
    break
    # Extract core info
    data.append({
        'title': c.get('title', ''),
        'authors': ', '.join(c.get('authors', [])),
        'keywords': ', '.join(c.get('keywords', [])),
        'abstract': c.get('abstract', ''),
        'pdf_url': f"https://openreview.net{c.get('pdf', '')}",
        'forum_url': f"https://openreview.net/forum?id={n.get('forum', '')}",
        'invitation': n.get('invitation', ''),
        'signatures': ', '.join(n.get('signatures', [])),
        'created_date': pd.to_datetime(n.get('cdate', 0), unit='ms'),
        'last_modified': pd.to_datetime(n.get('tmdate', 0), unit='ms')
    })

# 4️⃣ Create a DataFrame
# df = pd.DataFrame(data)

# # 5️⃣ Display (cleaner formatting)
# pd.set_option('display.max_colwidth', 120)
# display(df.head())
# print(f"\nTotal submissions: {len(df)}")

{'id': 'ryzm6BATZ', 'original': 'B1zmTBAT-', 'cdate': 1518730189355, 'pdate': None, 'odate': None, 'mdate': None, 'tcdate': 1508953370259, 'tmdate': 1644493022819, 'ddate': None, 'number': 93, 'content': {'title': 'Image Quality Assessment Techniques Improve Training and Evaluation of Energy-Based Generative Adversarial Networks', 'abstract': "We propose a new, multi-component energy function for energy-based Generative Adversarial Networks (GANs) based on methods from the image quality assessment literature. Our approach expands on the Boundary Equilibrium Generative Adversarial Network (BEGAN) by outlining some of the short-comings of the original energy and loss functions. We address these short-comings by incorporating an l1 score, the Gradient Magnitude Similarity score, and a chrominance score into the new energy function. We then provide a set of systematic experiments that explore its hyper-parameters. We show that each of the energy function's components is able to represent a

In [26]:
df = pd.read_json('iclr2018_p0.json')
len(df.notes)

935