In [10]:
# 1. Install Python libraries
!pip install openai -q

# 2. Install LaTeX (This takes about 2-3 minutes)
!apt-get update -q
!apt-get install -y texlive-latex-recommended texlive-fonts-recommended texlive-latex-extra -q

Hit:1 https://cli.github.com/packages stable InRelease
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Fetched 3,632 B in 1s (3,223 B/s)
Reading package lists...
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists...
Building dependency tree...
Reading state information...
texlive-fonts-recommended is already the newest version (2021.20220204

In [None]:
import os
import re
import tarfile
import subprocess
from openai import OpenAI
from google.colab import files # For easy downloading

# --- CONFIG ---

client = OpenAI(api_key="", base_url="https://api.perplexity.ai")
MODEL_NAME = "sonar"

class ArchiveTool:
    @staticmethod
    def extract(archive_path):
        extract_path = archive_path.replace(".tar.gz", "_extracted").replace(".tar", "_extracted")
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)
        print(f"Extracting {archive_path}...")
        with tarfile.open(archive_path, "r:*") as tar:
            tar.extractall(path=extract_path)
        return extract_path

class WorkspaceTool:
    def __init__(self, directory):
        self.directory = directory
        self.images = [f for root, _, fs in os.walk(directory) for f in fs if f.lower().endswith(('.png', '.jpg', '.pdf'))]

    def find_main_tex(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith(".tex"):
                    path = os.path.join(root, file)
                    with open(path, 'r', encoding='utf-8', errors='ignore') as f:
                        if "\\begin{document}" in f.read():
                            return path
        return None

class DistillerTool:
    @staticmethod
    def process_section(title, content, available_images):
        prompt = f"Task: Create a LaTeX Beamer frame.\nTitle: {title}\nContent: {content[:2500]}\nGraphics: {available_images}\nRules: Return ONLY LaTeX code (\\begin{{frame}}...\\end{{frame}})."
        try:
            response = client.chat.completions.create(model=MODEL_NAME, messages=[{"role": "user", "content": prompt}])
            return response.choices[0].message.content
        except Exception as e:
            return f"\\begin{{frame}}{{{title}}}\\item Error: {e}\\end{{frame}}"

def run_agent_colab(archive_file):
    work_dir = ArchiveTool.extract(archive_file)
    ws = WorkspaceTool(work_dir)
    main_tex = ws.find_main_tex()

    if not main_tex: return print("Main .tex not found.")

    with open(main_tex, 'r', encoding='utf-8', errors='ignore') as f:
        sections = re.findall(r'\\section\{(.+?)\}(.*?)(?=\\section|\\end\{document\})', f.read(), re.DOTALL)

    frames = [DistillerTool.process_section(re.sub(r'\\.*\{.*\}', '', s[0]), s[1], ws.images) for s in sections[:8]]

    output_tex = os.path.join(work_dir, "presentation.tex")
    with open(output_tex, 'w') as f:
        f.write("\\documentclass{beamer}\n\\usetheme{Madrid}\n\\usepackage{graphicx}\n\\begin{document}\n" +
                "\n".join([f.replace("```latex", "").replace("```", "").strip() for f in frames]) +
                "\n\\end{document}")

    # Compile
    os.chdir(work_dir)
    print("Compiling PDF...")
    subprocess.run(["pdflatex", "-interaction=nonstopmode", "presentation.tex"], check=False)

    if os.path.exists("presentation.pdf"):
        print("Done! Downloading...")
        files.download("presentation.pdf")
    else:
        print("PDF failed to generate. Check logs.")

# --- EXECUTION ---
# Upload your .tar.gz file using the sidebar or run:
uploaded = files.upload()
for filename in uploaded.keys():
    run_agent_colab(filename)

Saving arXiv-2601.07654v1.tar.gz to arXiv-2601.07654v1.tar (1).gz
Extracting arXiv-2601.07654v1.tar (1).gz...


  tar.extractall(path=extract_path)


Compiling PDF...
Done! Downloading...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os
import re
import tarfile
import subprocess
from openai import OpenAI
from google.colab import files # For easy downloading

# --- CONFIG ---
client = OpenAI(api_key="", base_url="https://api.perplexity.ai")
MODEL_NAME = "sonar"

class ArchiveTool:
    @staticmethod
    def extract(archive_path):
        extract_path = archive_path.replace(".tar.gz", "_extracted").replace(".tar", "_extracted")
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)
        print(f"Extracting {archive_path}...")
        with tarfile.open(archive_path, "r:*") as tar:
            tar.extractall(path=extract_path)
        return extract_path

class WorkspaceTool:
    def __init__(self, directory):
        self.directory = directory
        self.images = [f for root, _, fs in os.walk(directory) for f in fs if f.lower().endswith(('.png', '.jpg', '.pdf'))]

    def find_main_tex(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith(".tex"):
                    path = os.path.join(root, file)
                    with open(path, 'r', encoding='utf-8', errors='ignore') as f:
                        if "\\begin{document}" in f.read():
                            return path
        return None

class DistillerTool:
    """Uses Perplexity to create professional-grade LaTeX slides."""
    @staticmethod
    def process_section(title, content, available_images):
        prompt = f"""
        Task: Create a highly professional LaTeX Beamer frame for a research presentation.
        Section Title: {title}
        Raw Paper Content: {content[:3000]}
        Available Graphics: {available_images}

        Formatting Rules:
        1. STRUCTURE: Use a 2-column layout (\\begin{{columns}}) if an image is relevant.
           - Left column (0.6\\textwidth): Key bullet points or a short paragraph.
           - Right column (0.4\\textwidth): A relevant image from the graphics list.
        2. ELEMENTS:
           - Use \\begin{{block}}{{Key Takeaway}} for important summaries.
           - Use \\begin{{itemize}} with clear, concise pointers.
           - Use \\textbf{{}} for technical terms.
        3. GRAPHICS: If you use an image, use \\includegraphics[width=\\linewidth]{{filename}} inside the column.
        4. OUTPUT: Return ONLY the LaTeX code for the frame. No markdown backticks.
        """
        try:
            response = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}]
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"\\begin{{frame}}{{{title}}}\\item Error generating content.\\end{{frame}}"

def run_agent_colab(archive_file):
    # 1. Setup and Extraction
    work_dir = ArchiveTool.extract(archive_file)
    ws = WorkspaceTool(work_dir)
    main_tex = ws.find_main_tex()

    if not main_tex:
        return print("Main .tex file not found.")

    # 2. Parse Sections from the paper
    with open(main_tex, 'r', encoding='utf-8', errors='ignore') as f:
        raw_content = f.read()
        # Remove LaTeX comments to avoid confusing the AI
        clean_paper = re.sub(r'%.*', '', raw_content)
        sections = re.findall(r'\\section\{(.+?)\}(.*?)(?=\\section|\\end\{document\})', clean_paper, re.DOTALL)

    # 3. Generate Slides using Perplexity
    distiller = DistillerTool()
    frames = []
    print(f"Generating professional slides for {len(sections)} sections...")

    for title, body in sections[:8]: # Limit to first 8 sections for speed
        clean_title = re.sub(r'\\.*\{.*\}', '', title).strip()
        print(f"-> Processing: {clean_title}")
        frame_code = distiller.process_section(clean_title, body.strip(), ws.images)
        # Clean up any AI-generated markdown backticks
        frames.append(frame_code.replace("```latex", "").replace("```", "").strip())

    # 4. Assemble the Final TeX File (WITH THE TITLE PAGE)
    output_tex = os.path.join(work_dir, "presentation.tex")

    # Define Presentation Info
    title = "IsabeLLM: Automated Theorem Proving"
    author = "Gauranshi et al."
    institute = "IIIT Delhi"

    with open(output_tex, 'w', encoding='utf-8') as f:
        # Preamble: Standard setup + Theme + Metadata
        f.write("\\documentclass{beamer}\n")
        f.write("\\usetheme{metropolis}\n") # Using Metropolis for a modern look
        f.write("\\usepackage{graphicx}\n")
        f.write(f"\\title{{{title}}}\n")
        f.write(f"\\author{{{author}}}\n")
        f.write(f"\\institute{{{institute}}}\n")
        f.write(f"\\date{{\\today}}\n\n")

        # Document Start
        f.write("\\begin{document}\n\n")

        # --- THIS IS WHERE THE TITLE PAGE IS ADDED ---
        f.write("% Title Slide\n")
        f.write("\\begin{frame}\n  \\titlepage\n\\end{frame}\n\n")

        # --- TABLE OF CONTENTS (Optional but professional) ---
        f.write("% Table of Contents Slide\n")
        f.write("\\begin{frame}{Outline}\n  \\tableofcontents\n\\end{frame}\n\n")

        # Add generated frames
        f.write("\n\n".join(frames))

        # End Document
        f.write("\n\n\\end{document}")

    # 5. Compile to PDF
    print("Compiling professional PDF...")
    old_dir = os.getcwd()
    os.chdir(work_dir)
    try:
        # Run pdflatex (Metropolis often needs 2 passes for the progress bar)
        subprocess.run(["pdflatex", "-interaction=nonstopmode", "presentation.tex"], check=False)
        subprocess.run(["pdflatex", "-interaction=nonstopmode", "presentation.tex"], check=False)

        if os.path.exists("presentation.pdf"):
            print("SUCCESS! File is ready.")
            from google.colab import files
            files.download("presentation.pdf")
        else:
            print("PDF failed to generate. Check LaTeX logs in the sidebar.")
    finally:
        os.chdir(old_dir)

# --- EXECUTION ---
# Upload your .tar.gz file using the sidebar or run:
uploaded = files.upload()
for filename in uploaded.keys():
    run_agent_colab(filename)

Saving arXiv-2601.07654v1.tar.gz to arXiv-2601.07654v1.tar (1).gz
Extracting arXiv-2601.07654v1.tar (1).gz...
Generating professional slides for 8 sections...
-> Processing: Introduction


  tar.extractall(path=extract_path)


-> Processing: Background
-> Processing: Related Work
-> Processing: Model
-> Processing: IsabeLLM
-> Processing: Results
-> Processing: Discussion
-> Processing: Conclusion
Compiling professional PDF...
SUCCESS! File is ready.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os
import re
import tarfile
import subprocess
from openai import OpenAI
from google.colab import files # For easy downloading

# --- CONFIG ---
client = OpenAI(api_key="", base_url="https://api.perplexity.ai")
MODEL_NAME = "sonar"

class ArchiveTool:
    @staticmethod
    def extract(archive_path):
        extract_path = archive_path.replace(".tar.gz", "_extracted").replace(".tar", "_extracted")
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)
        print(f"Extracting {archive_path}...")
        with tarfile.open(archive_path, "r:*") as tar:
            tar.extractall(path=extract_path)
        return extract_path

class WorkspaceTool:
    def __init__(self, directory):
        self.directory = directory
        self.images = [f for root, _, fs in os.walk(directory) for f in fs if f.lower().endswith(('.png', '.jpg', '.pdf'))]

    def find_main_tex(self):
        for root, _, files in os.walk(self.directory):
            for file in files:
                if file.endswith(".tex"):
                    path = os.path.join(root, file)
                    with open(path, 'r', encoding='utf-8', errors='ignore') as f:
                        if "\\begin{document}" in f.read():
                            return path
        return None

class DistillerTool:
    """Enhanced Perplexity Tool for perfectly formatted Beamer slides."""
    @staticmethod
    def process_section(title, content, available_images):
        prompt = f"""
        Task: Create a LaTeX Beamer frame for a research paper titled "IsabeLLM".
        Section: {title}
        Content: {content[:3500]}
        Images available: {available_images}

        CRITICAL FORMATTING RULES:
        1. LAYOUT: You MUST use the 'columns' environment if an image is used.
           - Left Column (0.55\\textwidth): Concise bullet points ONLY.
           - Right Column (0.40\\textwidth): The \\includegraphics command.
        2. CONTENT LIMIT: Do not exceed 5 bullet points. If the content is long, summarize the "Key Takeaway".
        3. BLOCKS: Put the most important conclusion in a \\begin{{block}}{{Result}}...\\end{{block}} environment.
        4. IMAGES: Only use images from the provided list. Use \\centering and [width=\\linewidth].
        5. NO EXTRAS: Output ONLY the LaTeX code from \\begin{{frame}} to \\end{{frame}}. Do not use markdown backticks.
        """
        try:
            response = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}]
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"\\begin{{frame}}{{{title}}}\\item Error: {e}\\end{{frame}}"

def run_agent_colab(archive_file):
    work_dir = ArchiveTool.extract(archive_file)
    ws = WorkspaceTool(work_dir)
    main_tex = ws.find_main_tex()

    if not main_tex: return print("Main .tex not found.")

    with open(main_tex, 'r', encoding='utf-8', errors='ignore') as f:
        clean_paper = re.sub(r'%.*', '', f.read())
        sections = re.findall(r'\\section\{(.+?)\}(.*?)(?=\\section|\\end\{document\})', clean_paper, re.DOTALL)

    distiller = DistillerTool()
    frames = []
    print(f"Generating professional slides...")

    for title, body in sections[:8]:
        clean_title = re.sub(r'\\.*\{.*\}', '', title).strip()
        print(f"-> Processing: {clean_title}")
        frame_code = distiller.process_section(clean_title, body.strip(), ws.images)

        # CLEANUP: Remove AI artifacts that break compilation
        frame_code = frame_code.replace("```latex", "").replace("```", "").strip()
        # Ensure frame title is properly escaped if AI forgot
        if not frame_code.startswith("\\begin{frame}{"):
            frame_code = f"\\begin{{frame}}{{{clean_title}}}\n" + frame_code + "\n\\end{frame}"

        frames.append(frame_code)

    # 4. Final Assembly
    output_tex = os.path.join(work_dir, "presentation.tex")
    with open(output_tex, 'w', encoding='utf-8') as f:
        f.write("\\documentclass[10pt]{beamer}\n") # Slightly smaller font to prevent overflow
        f.write("\\usetheme{metropolis}\n")
        f.write("\\usepackage{graphicx}\n")
        f.write("\\usepackage{booktabs}\n") # For better tables if AI generates them
        f.write(f"\\title{{IsabeLLM: Automated Theorem Proving}}\n")
        f.write(f"\\author{{Gauranshi et al.}}\n")
        f.write(f"\\institute{{IIIT Delhi}}\n\\date{{\\today}}\n")
        f.write("\\begin{document}\n\n\\begin{frame}\\titlepage\\end{frame}\n")
        f.write("\\begin{frame}{Outline}\\tableofcontents\\end{frame}\n\n")
        f.write("\n\n".join(frames))
        f.write("\n\n\\end{document}")

    # 5. Compilation
    os.chdir(work_dir)
    # Run twice for Metropolis progress bar and Table of Contents
    subprocess.run(["pdflatex", "-interaction=nonstopmode", "presentation.tex"], capture_output=True)
    subprocess.run(["pdflatex", "-interaction=nonstopmode", "presentation.tex"], capture_output=True)

    if os.path.exists("presentation.pdf"):
        files.download("presentation.pdf")
    else:
        print("Compilation failed. Check presentation.log in Colab files.")

# --- EXECUTION ---
# Upload your .tar.gz file using the sidebar or run:
uploaded = files.upload()
for filename in uploaded.keys():
    run_agent_colab(filename)

Saving arXiv-2601.07654v1.tar.gz to arXiv-2601.07654v1.tar.gz
Extracting arXiv-2601.07654v1.tar.gz...
Generating professional slides...
-> Processing: Introduction


  tar.extractall(path=extract_path)


-> Processing: Background
-> Processing: Related Work
-> Processing: Model
-> Processing: IsabeLLM
-> Processing: Results
-> Processing: Discussion
-> Processing: Conclusion
Compilation failed. Check presentation.log in Colab files.
