In [None]:
import requests
import textwrap
from IPython.display import display, Markdown

def long_print(msg: str):
    wrapped_text = textwrap.fill(msg, width=140, replace_whitespace=False)
    print(wrapped_text)

api = 'http://localhost:80'

url = api + '/'
response = requests.get(url)
print(f'{response.status_code=}')
print(f'{response.text=}')

response.status_code=200
response.text='{"statusCode":200,"body":"The app is up!"}'


In [2]:
url = api + '/create_session'
response = requests.get(url)
session_id = response.json()['session_id']
print(f'Session Id: {session_id}')

Session Id: 35a6e742-c456-4aef-839e-1ce6b95c4163


In [3]:
import httpx
import os

files = ['https://arxiv.org/pdf/2505.10543']

os.makedirs('./data', exist_ok=True)

for idx, f in enumerate(files):
    file_name = f"./data/{f.split("/")[-1]}.pdf"
    if not os.path.exists(file_name):
        r = httpx.get(f, timeout=20)
        with open(file_name, 'wb') as f:
            f.write(r.content)

In [None]:
url = api + '/upload'
file_path = './data/2505.10543.pdf'

with open(file_path, 'rb') as image_file:
    data = {"session_id": session_id}
    files = {"file": (file_path, image_file)}
    response = requests.post(url, data=data, files=files)

if response.status_code == 200:
    print("Image uploaded successfully!")
else:
    print("Error uploading image:", response.status_code)

print(response.json())

Image uploaded successfully!
{'statusCode': 201, 'body': 'File 2505.10543.pdf uploaded.'}


In [43]:
%%time
url = api + '/prepare'

data = {"session_id": session_id, "recreate": False}
response = requests.post(url, json=data)

print(response)

<Response [200]>
CPU times: user 3.16 ms, sys: 2.71 ms, total: 5.87 ms
Wall time: 115 ms


In [None]:
url = api + '/query'

data = {"session_id": session_id, "query": "Summarize the document."}
response = requests.post(url, json=data)

In [None]:
display(Markdown(response.text.replace("\\n", "<br>")))

"Here’s a concise summary of the document:<br><br>1. Purpose  <br>   - Evaluates large language models (LLMs) on reasoning tasks using advanced prompting strategies (self-reflection, heuristic mutation, planning).<br><br>2. Key Findings  <br>   - Scaling up model size generally improves reasoning performance.  <br>   - Advanced prompting can help smaller models catch up on complex tasks but may hurt them on simpler ones.  <br>   - Excessive reasoning traces can reduce small-model performance on basic tasks.  <br>   - Current benchmarks don’t fully capture real-world reasoning complexity.<br><br>3. Implications  <br>   - Strategic prompting narrows the gap between small and large models but has trade-offs.  <br>   - There’s limited evidence that LLMs truly self-improve or generalize reasoning to new dynamic tasks.  <br>   - Future work should develop more robust benchmarks and explore hybrid approaches.<br><br>4. Broader Context  <br>   - Reviews various research on language-model applications:  <br>     • Language understanding & QA  <br>     • Quantitative problem solving  <br>     • Reinforcement learning & planning  <br>     • Interactive decision-making & multi-agent environments  <br>   - Discusses training with human feedback, the impact of input length, and emerging LLM-based agent capabilities."