In [1]:
# Imports for axiv api
import feedparser
from datetime import datetime, timedelta, timezone

In [2]:
# Imports for Llama parse
from dotenv import load_dotenv
load_dotenv()
from llama_parse import LlamaParse
import requests

True

In [3]:
# Import for both
import dr_util.file_utils as fu
import os
import time

In [6]:
# Path constants, make into config
RAW_PDF_DIR = "/Users/daniellerothermel/drotherm/data/raw_pdfs/"
PARSED_PDF_DIR = "/Users/daniellerothermel/drotherm/data/parsed_pdfs/"
METADATA_DIR = "/Users/daniellerothermel/drotherm/data/pdf_metadata/"

## Parsing Metrics

In [190]:
# count every 10 mins
total_parsed = [14, 25, 36, 45, 45, 53, 61, 70, 79, 79, 85, 88, 94, 95, 95, 107, 118, 125, 128, 133, 133, 141, 151, 160, 171, 183, 196, 209, 220, 226, 226, 240, 253, 262, 268, 275, 286, 293, 306, 312, 325]
start = total_parsed[0]
diffs = []
for tp in total_parsed[1:]:
    diffs.append(tp - start)
    start = tp
total = sum(diffs)
count = len(diffs)
print(f">> Llama Parse Processed {total*6/count:0.2f} papers/hr or {total/(count*10):0.2f} papers/min on average over a few hours")

>> Llama Parse Processed 46.65 papers/hr or 0.78 papers/min on average over a few hours


## Util Fxns

In [7]:
def make_arxiv_api_query_by_ids(id_list):
    assert len(id_list) > 0
    
    url_base = "https://export.arxiv.org/api/query"
    query_type = "id_list"
    id_list_str = ",".join(id_list)
    start = 0
    max_results = len(id_list)
    return f"{url_base}?{query_type}={id_list_str}&start={start}&max_results={max_results}"

In [8]:
def make_arxiv_api_query_by_author(author, max_results=100):
    url_base = "https://export.arxiv.org/api/query"
    query_type = "search_query"
    query_str = f'au:"{author.replace(' ', '%20')}"'
    start = 0
    sort_str = "sortBy=submittedDate&sortOrder=descending"
    return f"{url_base}?{query_type}={query_str}&start={start}&max_results={max_results}&{sort_str}"

In [9]:
def paper_feed_to_structured_info(p_feed):
    if 'entries' in p_feed:
        pd = p_feed['entries'][0]
    else:
        pd = p_feed
    pdf_link = None
    for ldict in pd['links']:
        if ldict.get('title', '') == 'pdf':
            pdf_link = ldict['href']
    return {
        'title': pd['title'],
        'abstract': pd['summary'],
        'published': pd['published'],
        'updated': pd['updated'],
        'authors': [a['name'] for a in pd['authors']],
        'pdf_link': pdf_link,
    }

In [10]:
def filter_dicts_by_years(dict_list, years, date_key):
    # Get the current date
    today = datetime.now(timezone.utc)
    
    # Calculate the threshold date (years ago from today)
    threshold_date = today - timedelta(days=years*365)  # Approximation for leap years
    
    # Filter the list of dicts
    filtered_dicts = [d for d in dict_list if datetime.strptime(d[date_key], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc) >= threshold_date]
    
    return filtered_dicts

In [11]:
def download_pdf(url, save_path):
    """
    Downloads a PDF from the given URL and saves it to the specified location.
    
    Args:
    url (str): The URL of the PDF to download.
    save_path (str): The path (including filename) where the PDF will be saved.
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code == 200:
            # Write the content to a file in binary mode
            with open(save_path, 'wb') as pdf_file:
                pdf_file.write(response.content)
            print(f"PDF downloaded and saved to {save_path}")
        else:
            print(f"Failed to download PDF. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

## Combine Util Fxns

In [15]:
def query_dump_download_by_author(author, max_results):
    print(f">> Getting papers from author: {author}")
    # First get all the recent papers by this author, ordered by date
    query = make_arxiv_api_query_by_author(author, max_results=max_results)
    parsed_feed = feedparser.parse(query)

    print(f">> Total number papers: {len(parsed_feed['entries'])}")
    structured_responses = [paper_feed_to_structured_info(pent) for pent in parsed_feed['entries']]

    pdfs_metadata = {}

    # Save all structured responses individually by pdf_url info
    # then download the pdf to the right location
    print(">> Dumping Metadata and Downloading PDFs")
    for sr  in structured_responses:
        pdf_url = sr['pdf_link']
        pdf_name = pdf_url.split('/')[-1]
        pdfs_metadata[pdf_name] = {**sr}

        # Dump metadata
        metadata_path = f'{METADATA_DIR}{pdf_name}.json'
        fu.dump_file(sr, metadata_path, verbose=False)
        pdfs_metadata[pdf_name]['metadata_path'] = metadata_path

        # Download the raw PDF
        raw_path = f'{RAW_PDF_DIR}{pdf_name}.pdf'
        if not os.path.exists(raw_path):
            download_pdf(pdf_url.replace("arxiv.org", "export.arxiv.org"), raw_path)
            time.sleep(4)
        pdfs_metadata[pdf_name]['raw_path'] = raw_path

        print(f" - {pdf_name}: {sr['title']}\n")

    query_metadata = {
        'author': author,
        'query': query,
        'max_results': max_results,
        'num_results': len(structured_responses),
        'pdfs_metadata': pdfs_metadata,
    }
    fu.dump_file(query_metadata, f'{METADATA_DIR}{author.replace(" ", "_")}_query_metadata.json', verbose=True)

In [171]:
# Requires the following two lines to run first
# import nest_asyncio
# nest_asyncio.apply()
def load_author_select_recent_llama_parse(author, num_years=2, max_parse=1):
    # First load the author metadata
    author_metadata_path = f'{METADATA_DIR}{author.replace(" ", "_")}_query_metadata.json'
    amd = fu.load_file(author_metadata_path)
    all_papers = [{**v, 'name': k} for k, v in amd['pdfs_metadata'].items()]

    # Then get the papers from the last N years
    recent_papers = filter_dicts_by_years(all_papers, num_years, date_key="published")
    print(f">> {author} has {len(recent_papers)} / {len(all_papers)} in the last {num_years} years")

    parser = LlamaParse(result_type="markdown")

    # Then for each paper use llama parse to process and dump the paper
    num_parsed = 0
    for rp in recent_papers:
        if num_parsed >= max_parse:
            print(f">> Reached parse: {num_parsed}, break")
            break
        pdf_name = rp['name']
        full_pdf_path = rp['raw_path']
        parsed_pdf_path = f'{PARSED_PDF_DIR}{pdf_name}.pkl'
        #print(f">> Parsed pdf to write: {parsed_pdf_path}")
        if not os.path.exists(parsed_pdf_path):
            print(f">> Begin parsing number: {num_parsed}")
            rp_docs = parser.load_data(full_pdf_path)
            fu.dump_file(rp_docs, parsed_pdf_path, verbose=True)
            print(f" - {len(rp_docs)} blocks for paper: {rp['title']}")
            num_parsed += 1
            time.sleep(3)
    

## Test Util Fxns

In [5]:
title = "Gradient Matching for Domain Generalization"
first_author = "Yuge Shi"
last_author = "Gabriel Synnaeve"
arxiv_id = "2104.09937"
pdf_url = "https://arxiv.org/pdf/2104.09937"

### Query Single Paper, Extract Data

In [None]:
q1 = make_arxiv_api_query_by_ids([arxiv_id])
q1

In [None]:
paper_feed_to_structured_info(feedparser.parse(q1))

### Query Papers by Author

In [None]:
q2 = make_arxiv_api_query_by_author(last_author, max_results=1000)
q2

In [None]:
p2s = feedparser.parse(q2)
len(p2s['entries'])

In [None]:
parsed_p2s = [paper_feed_to_structured_info(pent) for pent in p2s['entries']]

In [None]:
parsed_p2s[-1]

In [None]:
within_2_yrs = filter_dicts_by_years(parsed_p2s, 2, date_key="published")

In [None]:
within_2_yrs[-1]

### Download PDF and Parse with LlamaParse

In [None]:
pdf_name = pdf_url.split('/')[-1]
full_pdf_path = f'{raw_pdf_path}{pdf_name}.pdf'
download_pdf(pdf_url, full_pdf_path)

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
parser = LlamaParse(result_type="markdown")

In [None]:
docs = parser.load_data(full_pdf_path)

In [None]:
print(docs[1].text[:1000])

In [None]:
full_parsed_pdf_path = f'{parsed_pdf_path}{pdf_name}.pkl'
fu.dump_file(docs, full_parsed_pdf_path)

In [None]:
docs_load = fu.load_file(full_parsed_pdf_path)

In [None]:
print(docs_load[1].text[:1000])

## Query Arxiv API and Download PDF

#### He He: 67 (13, 27, 36, 40, 44)

In [149]:
query_dump_download_by_author('He He', 70)

>> Getting papers from author: He He
>> Total number papers: 67
>> Dumping Metadata and Downloading PDFs
 - 2409.12822v2: Language Models Learn to Mislead Humans via RLHF

 - 2407.04549v1: Spontaneous Reward Hacking in Iterative Self-Refinement

 - 2406.12158v1: LLMs Are Prone to Fallacies in Causal Inference

 - 2404.19733v3: Iterative Reasoning Preference Optimization

 - 2404.16019v1: The PRISM Alignment Project: What Participatory, Representative and
  Individualised Human Feedback Reveals About the Subjective and Multicultural
  Alignment of Large Language Models

 - 2404.09932v2: Foundational Challenges in Assuring Alignment and Safety of Large
  Language Models

 - 2404.00246v1: Your Co-Workers Matter: Evaluating Collaborative Capabilities of
  Language Models in Blocks World

 - 2402.12530v1: Parallel Structures in Pre-training Data Yield In-Context Learning

 - 2401.13986v1: Towards Consistent Natural-Language Explanations via
  Explanation-Consistency Finetuning

 - 2401.0617

#### Rajesh Ranganath: 70 (8, 18, 28, 35, 40)

In [150]:
query_dump_download_by_author('Rajesh Ranganath', 80)

>> Getting papers from author: Rajesh Ranganath
>> Total number papers: 70
>> Dumping Metadata and Downloading PDFs
 - 2407.07998v1: What's the score? Automated Denoising Score Matching for Nonlinear
  Diffusions

 - 2406.13660v1: Towards Minimal Targeted Updates of Language Models with Targeted
  Negative Training

 - 2406.04318v1: Adaptive Sampling of k-Space in Magnetic Resonance for Rapid Pathology
  Prediction

 - 2405.19534v3: Preference Learning Algorithms Do Not Learn Preference Rankings

 - 2403.00025v1: On the Challenges and Opportunities in Generative AI

 - 2401.08777v1: Robust Anomaly Detection for Particle Physics Using Multi-Background
  Representation Learning

 - 2312.01210v4: When accurate prediction models yield harmful self-fulfilling prophecies

 - 2311.12781v1: Quantifying Impairment and Disease Severity Using AI Models Trained on
  Healthy Subjects

 - 2310.03725v3: Stochastic interpolants with data-dependent couplings

 - 2308.12553v1: Don't blame Dataset Shift!

#### Eunsol Choi: 64 (13, 32, 39, 50, 54)

In [151]:
query_dump_download_by_author('Eunsol Choi', 70)

>> Getting papers from author: Eunsol Choi
>> Total number papers: 64
>> Dumping Metadata and Downloading PDFs
 - 2409.18110v1: Open-World Evaluation for Retrieving Diverse Perspectives

 - 2408.06303v1: Long-Form Answers to Visual Questions from Blind and Low Vision People

 - 2407.06249v1: CodeUpdateArena: Benchmarking Knowledge Editing on API Updates

 - 2406.17761v2: CaLMQA: Exploring culturally specific long-form question answering
  across 23 languages

 - 2406.17692v1: From Distributional to Overton Pluralism: Investigating Large Language
  Model Alignment

 - 2406.14670v1: Exploring Design Choices for Building Language-Specific LLMs

 - 2405.19597v1: SVFT: Parameter-Efficient Fine-Tuning with Singular Vectors

 - 2404.12447v3: AmbigDocs: Reasoning across Documents on Different Entities under the
  Same Name

 - 2403.03866v1: KIWI: A Dataset of Knowledge-Intensive Writing Instructions for
  Answering Research Questions

 - 2402.01591v2: BAT: Learning to Reason about Spatial Soun

#### Mengye Ren: 47 (10, 12, 14, 26, 36)

In [152]:
query_dump_download_by_author('Mengye Ren', 50)

>> Getting papers from author: Mengye Ren
>> Total number papers: 47
>> Dumping Metadata and Downloading PDFs
 - 2408.11208v1: PooDLe: Pooled and dense self-supervised learning from naturalistic
  videos

 - 2408.02226v2: ProCreate, Don't Reproduce! Propulsive Energy Diffusion for Creative
  Generation

 - 2404.19132v2: Integrating Present and Past in Unsupervised Continual Learning

 - 2403.15362v1: CoLLEGe: Concept Embedding Generation for Large Language Models

 - 2403.09613v1: Reawakening knowledge: Anticipatory recovery from catastrophic
  interference via structured training

 - 2402.00300v2: Self-supervised learning of video representations from a child's
  perspective

 - 2312.12736v2: Learning and Forgetting Unsafe Examples in Large Language Models

 - 2312.05269v2: LifelongMemory: Leveraging LLMs for Answering Queries in Long-form
  Egocentric Videos

 - 2311.17218v1: BIM: Block-Wise Self-Supervised Learning with Masked Image Modeling

 - 2311.02007v1: Towards Unsupervised Ob

#### Pavel Izmailov: 25 (2, 5, 9, 13, 17)

In [153]:
query_dump_download_by_author('Pavel Izmailov', 100)

>> Getting papers from author: Pavel Izmailov
>> Total number papers: 25
>> Dumping Metadata and Downloading PDFs
 - 2403.01272v1: Can a Confident Prior Replace a Cold Posterior?

 - 2312.09390v1: Weak-to-Strong Generalization: Eliciting Strong Capabilities With Weak
  Supervision

 - 2306.11074v1: Simple and Fast Group Robustness by Automatic Feature Reweighting

 - 2212.08013v2: FlexiViT: One Model for All Patch Sizes

 - 2210.11369v1: On Feature Learning in the Presence of Spurious Correlations

 - 2204.02937v2: Last Layer Re-Training is Sufficient for Robustness to Spurious
  Correlations

 - 2203.16481v1: On Uncertainty, Tempering, and Data Augmentation in Bayesian
  Classification

 - 2203.04449v2: Unsupervised learning of two-component nematicity from STM data on magic
  angle bilayer graphene

 - 2202.11678v3: Bayesian Model Selection, the Marginal Likelihood, and Generalization

 - 2106.11905v2: Dangers of Bayesian Model Averaging under Covariate Shift

 - 2106.05945v2: Does K

#### Lerrel Pinto: 68 (14, 23, 34, 43, 53)

In [154]:
query_dump_download_by_author('Lerrel Pinto', 70)

>> Getting papers from author: Lerrel Pinto
>> Total number papers: 68
>> Dumping Metadata and Downloading PDFs
 - 2410.02749v1: Training Language Models on Synthetic Edit Sequences Improves Code
  Synthesis

 - 2409.12192v1: DynaMo: In-Domain Dynamics Pretraining for Visuo-Motor Control

 - 2409.08276v3: AnySkin: Plug-and-play Skin Sensing for Robotic Touch

 - 2409.05865v1: Robot Utility Models: General Policies for Zero-Shot Deployment in New
  Environments

 - 2406.07539v2: BAKU: An Efficient Transformer for Multi-Task Policy Learning

 - 2406.04318v1: Adaptive Sampling of k-Space in Magnetic Resonance for Rapid Pathology
  Prediction

 - 2403.07870v1: OPEN TEACH: A Versatile Teleoperation System for Robotic Manipulation

 - 2403.03181v2: Behavior Generation with Latent Actions

 - 2402.10211v3: Hierarchical State Space Models for Continuous Sequence-to-Sequence
  Modeling

 - 2401.12202v2: OK-Robot: What Really Matters in Integrating Open-Knowledge Models for
  Robotics

 - 2312.0

#### Tal Linzen: 64 Papers (11, 19, 26, 35, 44)

In [155]:
query_dump_download_by_author('Tal Linzen', 1000)

>> Getting papers from author: Tal Linzen
>> Total number papers: 64
>> Dumping Metadata and Downloading PDFs
 - 2409.04556v1: How Does Code Pretraining Affect Language Model Task Performance?

 - 2407.04593v1: Testing learning hypotheses using neural networks by manipulating
  learning data

 - 2404.06214v2: [Call for Papers] The 2nd BabyLM Challenge: Sample-efficient pretraining
  on a developmentally plausible corpus

 - 2403.07202v1: SPAWNing Structural Priming Predictions from a Cognitively Motivated
  Parser

 - 2402.13956v3: Can You Learn Semantics Through Next-Word Prediction? The Case of
  Entailment

 - 2311.07811v2: In-context Learning Generalizes, But Not Always Robustly: The Case of
  Syntax

 - 2311.00445v2: A Systematic Comparison of Syllogistic Reasoning in Humans and Language
  Models

 - 2310.19956v2: The Impact of Depth on Compositional Generalization in Transformer
  Language Models

 - 2310.16142v1: A Language Model with Limited Memory Capacity Captures Interferenc

#### Kyunghyun Cho: 272 (30, 64, 85, 106, 134)

In [156]:
query_dump_download_by_author('Kyunghyun Cho', 1000)

>> Getting papers from author: Kyunghyun Cho
>> Total number papers: 272
>> Dumping Metadata and Downloading PDFs
 - 2409.18581v1: Using Deep Autoregressive Models as Causal Inference Engines

 - 2409.01931v2: On the design space between molecular mechanics and machine learning
  force fields

 - 2408.16218v1: Targeted Cause Discovery with Data-Driven Learning

 - 2408.13430v1: Analysis of the ICML 2023 Ranking Data: Can Authors' Opinions of Their
  Own Papers Assist Peer Review in Machine Learning?

 - 2408.00165v3: Non-convolutional Graph Neural Networks

 - 2407.18134v2: $\mathbb{X}$-Sample Contrastive Loss: Improving Contrastive Learning
  with Sample Similarity Graphs

 - 2407.21028v1: Antibody DomainBed: Out-of-Distribution Generalization in Therapeutic
  Protein Design

 - 2407.02736v1: MentalAgora: A Gateway to Advanced Personalized Care in Mental Health
  through Multi-Agent Debating and Attribute Control

 - 2407.00236v1: Closed-Form Test Functions for Biophysical Sequence Op

## Test Llama Parse Per-Author on Most Recent Papers

In [20]:
import nest_asyncio
nest_asyncio.apply()

In [167]:
MAX_PARSE=1

In [170]:
load_author_select_recent_llama_parse("Tal Linzen", num_years=5, max_parse=MAX_PARSE)

>> Tal Linzen has 44 / 64 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 32bd492a-bde1-40f5-9188-83899fcd3e4d
 - 39 blocks for paper: Testing learning hypotheses using neural networks by manipulating
  learning data
>> Reached parse: 1, break


In [172]:
load_author_select_recent_llama_parse("Pavel Izmailov", num_years=5, max_parse=MAX_PARSE)

>> Pavel Izmailov has 17 / 25 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 9ea86b08-73de-42fe-bf46-bb468919d185
. - 19 blocks for paper: Can a Confident Prior Replace a Cold Posterior?
>> Reached parse: 1, break


In [173]:
load_author_select_recent_llama_parse("Lerrel Pinto", num_years=5, max_parse=MAX_PARSE)

>> Lerrel Pinto has 53 / 68 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 381ddbea-be48-4d4a-b9f6-c3ad488e41bc
... - 24 blocks for paper: Training Language Models on Synthetic Edit Sequences Improves Code
  Synthesis
>> Reached parse: 1, break


In [174]:
load_author_select_recent_llama_parse("Kyunghyun Cho", num_years=5, max_parse=MAX_PARSE)

>> Kyunghyun Cho has 134 / 272 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id fb74496b-3199-45ef-ae27-43e4da62eff3
.... - 16 blocks for paper: Using Deep Autoregressive Models as Causal Inference Engines
>> Reached parse: 1, break


In [175]:
load_author_select_recent_llama_parse("Mengye Ren", num_years=5, max_parse=MAX_PARSE)

>> Mengye Ren has 36 / 47 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id a9294576-9c4e-480f-a3cf-ed9397a3c18a
.... - 22 blocks for paper: PooDLe: Pooled and dense self-supervised learning from naturalistic
  videos
>> Reached parse: 1, break


In [176]:
load_author_select_recent_llama_parse("Rajesh Ranganath", num_years=5, max_parse=MAX_PARSE)

>> Rajesh Ranganath has 40 / 70 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 5bf0d770-548a-44f8-bbf1-8a85c93697c0
.. - 27 blocks for paper: What's the score? Automated Denoising Score Matching for Nonlinear
  Diffusions
>> Reached parse: 1, break


In [177]:
load_author_select_recent_llama_parse("Eunsol Choi", num_years=5, max_parse=MAX_PARSE)

>> Eunsol Choi has 54 / 64 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 3b6ede8f-76c1-42d1-b937-da835e5a4465
.. - 20 blocks for paper: Open-World Evaluation for Retrieving Diverse Perspectives
>> Reached parse: 1, break


In [178]:
load_author_select_recent_llama_parse("He He", num_years=5, max_parse=MAX_PARSE)

>> He He has 44 / 67 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id e8430822-c9a1-4489-8438-75d39dd79d89
........ - 23 blocks for paper: Language Models Learn to Mislead Humans via RLHF
>> Reached parse: 1, break


In [179]:
pdfn = '2409.04556v1'
read_doc = fu.load_file(f'{PARSED_PDF_DIR}{pdfn}.pkl')
print(read_doc[1].text)

Establishing a causal relationship between code pretraining and downstream performance is difficult. Earlier studies have tackled these questions by comparing off-the-shelf code and no-code models (see, inter alia, Kim et al. 2024; Coda-Forno et al. 2024). Such observational studies are limited by the design choices of model creators and the availability of information about hyperparameters and training data. Many of the models typically surveyed are proprietary, and don’t disclose this information. While pairs of open-source models differing only in their pretraining corpora do exist, such as Llama 2 & Code Llama (Touvron et al., 2023; Roziere et al., 2023) or Gemma & CodeGemma (Gemma Team et al., 2024; Google, 2024), they often come with two important caveats: first, the code-variants of the models are derived by taking the non-code variants and conducting additional pretraining on code data, meaning the comparisons cannot control for total data volume; second, each pair treats the i

## Run Llama Parse on All Authors top 50 papers in last 5 years

In [180]:
MAX_PARSE = 1
for r in range(50):
    for author in [
        "Tal Linzen",
        "Pavel Izmailov",
        "Lerrel Pinto",
        "Kyunghyun Cho",
        "Mengye Ren",
        "Rajesh Ranganath",
        "Eunsol Choi",
        "He He",
    ]:
        load_author_select_recent_llama_parse(author, num_years=5, max_parse=MAX_PARSE)
    
    

>> Tal Linzen has 44 / 64 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id a3f5fa8b-1324-41a2-8d03-27dc21c1e5db
 - 5 blocks for paper: [Call for Papers] The 2nd BabyLM Challenge: Sample-efficient pretraining
  on a developmentally plausible corpus
>> Reached parse: 1, break
>> Pavel Izmailov has 17 / 25 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id df7273e5-0537-4f37-90ac-d2d2f82af66f
.. - 49 blocks for paper: Weak-to-Strong Generalization: Eliciting Strong Capabilities With Weak
  Supervision
>> Reached parse: 1, break
>> Lerrel Pinto has 53 / 68 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 277ff21e-2285-427e-af4a-5ac51a2e9ada
.. - 23 blocks for paper: DynaMo: In-Domain Dynamics Pretraining for Visuo-Motor Control
>> Reached parse: 1, break
>> Kyunghyun Cho has 134 / 272 in the last 5 years
>> Begin parsing number: 0
Started parsing the file under job_id 6f01a3f3-eec2-45b