In [4]:
from datetime import datetime

def archive_paper_under_date(filename, title, authors, tags, abstract, link, website_link, affiliation=None, notes=None, teaser_image_path=None, pipeline_image_path=None):
    """
    Archives a paper record in a Markdown file under the current date's first-level header.
    If the header for today's date does not exist, it's created.
    """
    current_date = datetime.now().strftime("%Y-%m-%d")
    header = f"# {current_date}\n\n"
    paper_record = generate_paper_record(title, authors, affiliation, tags, abstract, link, website_link, notes, teaser_image_path, pipeline_image_path)
    
    try:
        with open(filename, "r+", encoding="utf-8") as file:
            content = file.readlines()
            file.seek(0)
            if content and content[0].strip() == header.strip():
                # If the first header is today's date, archive under this header
                content.insert(1, paper_record + "\n")
            else:
                # Otherwise, prepend today's header and the record
                content = [header] + [paper_record + "\n"] + content
            file.writelines(content)
    except FileNotFoundError:
        # If the file does not exist, create it with the header and the record
        with open(filename, "w", encoding="utf-8") as file:
            file.write(header + paper_record + "\n")

def generate_paper_record(
    title, authors, affiliation, tags, abstract, link, website_link, notes, 
    teaser_image_path, pipeline_image_path):
    """
    Generates the Markdown text for a paper record.
    """
    record = f"## {title}\n\n"
    # record += f"- **Authors**: {', '.join(authors)}\n"
    # authors is a string, not a list
    record += f"- **Authors**: {authors}\n"
    if affiliation:
        record += f"- **Institutions**: {', '.join(affiliation)}\n"
    tags_formatted = ', '.join([f"`{tag}`" for tag in tags])
    record += f"- **Tags**: {tags_formatted}\n\n" 
    
    record += f"### Abstract\n\n{abstract}\n\n"
    record += f"[Paper Link]({link})\n\n"
    
    if teaser_image_path:
        record += f"![Teaser Image]({teaser_image_path})\n\n"
    if pipeline_image_path:
        record += f"![Pipeline Image]({pipeline_image_path})\n\n"   
    
    if website_link != "Project website not found":
        record += f"[Website Link]({website_link})\n\n"
    
    # if the notes is not a none, add a notes section
    if notes:
        record += f"### Notes\n\n{notes}\n\n"
    
    return record

import requests
from bs4 import BeautifulSoup

def fetch_paper_details(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract the paper title
        title_element = soup.find('h1', class_='title mathjax')
        title = title_element.text.replace('\n', '').strip() if title_element else 'Title not found'
        # move the "Title"
        title = title.replace("Title:", "").strip()

        # Extract the author information
        authors_element = soup.find('div', class_='authors')
        authors = authors_element.text.replace('\n', ' ').replace('Authors:', '').strip() if authors_element else 'Authors not found'

        # Extract the abstract
        abstract_element = soup.find('blockquote', class_='abstract mathjax')
        abstract = abstract_element.text.replace('\n', ' ').replace('Abstract:  ', '').strip() if abstract_element else 'Abstract not found'
        # move the "Abstract"
        abstract = abstract.replace("Abstract:", "").strip()

        # Extract the project website URL
        project_website_element = soup.find('a', href=lambda href: href and "leg-manip" in href)
        project_website_url = project_website_element['href'] if project_website_element else "Project website not found"

        return title, authors, abstract, project_website_url
        # return {
        #     'Title': title,
        #     'Authors': authors,
        #     'Abstract': abstract,
        #     'Project Website URL': project_website_url
        # }
    except Exception as e:
        return {'Error': f'Failed to fetch details due to {e}'}

# # Example usage
# url = 'https://arxiv.org/abs/2403.20328'
# paper_details = fetch_paper_details(url)
# for key, value in paper_details.items():
#     print(f'{key}: {value}')

# url = 'https://arxiv.org/abs/2403.20328'
# title, authors, abstract, project_website_url = fetch_paper_details(url)
# print(title)
# print(authors)
# print(abstract)
# print(project_website_url)

from types import SimpleNamespace

In [5]:
tags = SimpleNamespace(
    nav="Navigation",
    mm="Mobile Manipulation",
    s2r="Simulation to Reality",
    il="Imitation Learning",
    bc="Behavioral Cloning",
    rl="Reinforcement Learning",
    review="Review",
    llm="Large Language Models",
    nerf="NeRF",
    m="Manipulation",
)
unis = SimpleNamespace(
    MIT="Massachusetts Institute of Technology",
    Stanford="Stanford University",
    CMU="Carnegie Mellon University",
    UCB="University of California, Berkeley",
    Harvard="Harvard University",
    Oxford="University of Oxford",
    Cambridge="University of Cambridge",
    ETH="ETH Zurich - Swiss Federal Institute of Technology",
    Imperial="Imperial College London",
    Tsinghua="Tsinghua University",
    iiis="IIIS, Tsinghua University",
    PKU="Peking University",
    TUM="Technical University of Munich",
    HKUST="Hong Kong University of Science and Technology",
    CUHK="Chinese University of Hong Kong",
)
ins = SimpleNamespace(
    DeepMind="Google DeepMind",
    OpenAI="OpenAI",
    FAIR="Facebook AI Research",
    MSR="Microsoft Research",
    IBM="IBM Research",
    NVIDIA="NVIDIA Research",
    ShanghaiQizhi="Shanghai Qizhi Institute",
)


In [6]:
link="https://arxiv.org/abs/2404.01812"

affiliation=[unis.CUHK, unis.TUM]
t=[tags.nerf, tags.m]

notes="These are the notes."
teaser_image_path="imgs/2024-04-04_00-49.png"
pipeline_image_path="imgs/2024-04-04_00-30_1.png"

title, authors, abstract, project_website_url = fetch_paper_details(link)
archive_paper_under_date(
    filename="papers.md",
    title=title,authors=authors,tags=t,abstract=abstract,link=link,website_link=project_website_url,
    
    # affiliation=affiliation,
    # notes=notes,
    teaser_image_path=teaser_image_path,
    # pipeline_image_path=pipeline_image_path,
)

In [7]:
link="https://arxiv.org/abs/2403.20328"

affiliation=[ins.ShanghaiQizhi, unis.HKUST, unis.CMU, unis.iiis]
t=[tags.rl, tags.bc, tags.mm]

notes="These are the notes."
teaser_image_path="../imgs/2024-04-04_00-30.png"
pipeline_image_path="../imgs/2024-04-04_00-30_1.png"


link="https://arxiv.org/abs/2403.19916"

affiliation=[unis.CUHK, unis.TUM]
t=[tags.review, tags.il]

notes="These are the notes."
teaser_image_path="../imgs/2024-04-04_00-30.png"
pipeline_image_path="../imgs/2024-04-04_00-30_1.png"

