From 13cdd082b8ee024a2e463b4627b8a4abdef00030 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 7 Nov 2025 12:30:14 -0500 Subject: [PATCH 1/2] try it! --- example/main.py | 1 + example/worker.py | 5 +++++ .../pyscript_fsspec_client/client.py | 14 ++++++++++++++ .../pyscript_fsspec_client/io.py | 10 ++++++++-- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/example/main.py b/example/main.py index 4b3be6b..36000f5 100644 --- a/example/main.py +++ b/example/main.py @@ -15,4 +15,5 @@ def console_print(x): print(x) pw.sync.session = io.request +pw.sync.batch = io.batch pw.sync.console_print = console_print diff --git a/example/worker.py b/example/worker.py index 8a37ce9..43f03ba 100644 --- a/example/worker.py +++ b/example/worker.py @@ -15,6 +15,11 @@ out = fs.cat("local/mdurant/code/fsspec-proxy/pyproject.toml", start=0, end=10) sync.console_print(str(("binary:", type(out), out))) +out = fs.cat_ranges( + paths=["local/mdurant/code/fsspec-proxy/pyproject.toml"] * 3, + starts=[0, 0, 20], ends=[1, 10, 30]) +sync.console_print(str(("binary:", type(out), out))) + fs.pipe_file("local/mdurant/code/fsspec-proxy/OUTPUT", b"hello world") diff --git a/pyscript-fsspec-client/pyscript_fsspec_client/client.py b/pyscript-fsspec-client/pyscript_fsspec_client/client.py index b5b5248..aba0e08 100644 --- a/pyscript-fsspec-client/pyscript_fsspec_client/client.py +++ b/pyscript-fsspec-client/pyscript_fsspec_client/client.py @@ -44,6 +44,20 @@ def rm_file(self, path): path = self._strip_protocol(path) self._call(f"delete/{path}", method="DELETE", binary=True) + def cat_ranges( + self, paths, starts, ends, max_gap=None, on_error="return", **kwargs + ): + logger.debug("cat_ranges: %s paths", len(paths)) + out = sync.batch( + [("GET", f"{self.base_url}/{path}", None, + ffi.to_js({"Range": f"bytes={s}-{e}"}), "binary") + for path, s, e in zip(paths, starts, ends)], + ) + return [(OSError(0, o) if isinstance(o, str) and o == "ISawAnError" + else bytes(o.to_py())) + for o in out] + + def _open( self, path, diff --git a/pyscript-fsspec-client/pyscript_fsspec_client/io.py b/pyscript-fsspec-client/pyscript_fsspec_client/io.py index 3dddb94..a2cba72 100644 --- a/pyscript-fsspec-client/pyscript_fsspec_client/io.py +++ b/pyscript-fsspec-client/pyscript_fsspec_client/io.py @@ -1,5 +1,4 @@ -import json -import pyscript +import asyncio import js from pyodide import ffi, console @@ -23,3 +22,10 @@ async def request(method, path, data=None, headers=None, if outmode is None: return return "ISawAnError" + + +async def batch(requests, **kwargs): + return asyncio.gather( + *[request(*r, **kwargs) for r in requests], + return_exceptions=True + ) From 721f44b71a23704c4f8df6736b596667091d4219 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 7 Nov 2025 14:03:59 -0500 Subject: [PATCH 2/2] working example (on my FS) --- example/worker.py | 19 ++++--- fsspec-proxy/fsspec_proxy/bytes_server.py | 2 +- .../pyscript_fsspec_client/client.py | 13 ++--- .../pyscript_fsspec_client/io.py | 53 ++++++++++++------- 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/example/worker.py b/example/worker.py index 43f03ba..76ce29d 100644 --- a/example/worker.py +++ b/example/worker.py @@ -6,21 +6,23 @@ import pandas as pd import pyscript_fsspec_client.client -fs = fsspec.filesystem("pyscript") -sync.console_print(str(fs.ls("local"))) +fs = fsspec.filesystem("pyscript", base_url="http://localhost:8000/local") +sync.console_print(str(fs.ls(""))) -out = fs.cat("local/mdurant/code/fsspec-proxy/pyproject.toml") +out = fs.cat("mdurant/code/fsspec-proxy/pyproject.toml") sync.console_print(str(("binary:", type(out), out))) -out = fs.cat("local/mdurant/code/fsspec-proxy/pyproject.toml", start=0, end=10) +out = fs.cat("mdurant/code/fsspec-proxy/pyproject.toml", start=0, end=10) sync.console_print(str(("binary:", type(out), out))) out = fs.cat_ranges( - paths=["local/mdurant/code/fsspec-proxy/pyproject.toml"] * 3, - starts=[0, 0, 20], ends=[1, 10, 30]) + paths=["mdurant/code/fsspec-proxy/pyproject.toml"] * 3, + starts=[0, 0, 20], + ends=[1, 10, 30] +) sync.console_print(str(("binary:", type(out), out))) -fs.pipe_file("local/mdurant/code/fsspec-proxy/OUTPUT", b"hello world") +fs.pipe_file("mdurant/code/fsspec-proxy/OUTPUT", b"hello world") def make_output(table): @@ -29,5 +31,6 @@ def make_output(table): new_div.innerHTML = table page.append(new_div) -my_data = pd.read_parquet("pyscript://Conda Stats/2017/01/2017-01-07.parquet") +my_data = pd.read_parquet("pyscript://2017/01/2017-01-07.parquet", + storage_options={"base_url": "http://localhost:8000/Conda Stats"}) make_output(my_data[:100].to_html()) diff --git a/fsspec-proxy/fsspec_proxy/bytes_server.py b/fsspec-proxy/fsspec_proxy/bytes_server.py index d262a2e..ef32f5e 100644 --- a/fsspec-proxy/fsspec_proxy/bytes_server.py +++ b/fsspec-proxy/fsspec_proxy/bytes_server.py @@ -38,7 +38,7 @@ async def list_dir(key, path): except FileNotFoundError: raise fastapi.HTTPException(status_code=404, detail="Item not found") out = [ - {"name": f"{key}/{o['name'].replace(fs_info['path'], '', 1).lstrip('/')}", + {"name": f"{o['name'].replace(fs_info['path'], '', 1).lstrip('/')}", "size": o["size"], "type": o["type"]} for o in out ] diff --git a/pyscript-fsspec-client/pyscript_fsspec_client/client.py b/pyscript-fsspec-client/pyscript_fsspec_client/client.py index aba0e08..fd7f0e3 100644 --- a/pyscript-fsspec-client/pyscript_fsspec_client/client.py +++ b/pyscript-fsspec-client/pyscript_fsspec_client/client.py @@ -1,6 +1,6 @@ """An fsspec filesystem that proxies via pyscriptapps.com.""" -from json import dumps, loads +from json import loads import logging from pyscript import sync, ffi @@ -16,7 +16,7 @@ class PyscriptFileSystem(AbstractFileSystem): protocol = "pyscript" - def __init__(self, base_url): + def __init__(self, base_url="http://0.0.0.0:8000/local"): super().__init__() self.base_url = base_url @@ -49,15 +49,16 @@ def cat_ranges( ): logger.debug("cat_ranges: %s paths", len(paths)) out = sync.batch( - [("GET", f"{self.base_url}/{path}", None, - ffi.to_js({"Range": f"bytes={s}-{e}"}), "binary") - for path, s, e in zip(paths, starts, ends)], + [{ + "args": ("GET", f"{self.base_url}/bytes/{path}"), + "kwargs": {"headers": ffi.to_js({"Range": f"bytes={s}-{e + 1}"}), "outmode": "bytes"} + } + for path, s, e in zip(paths, starts, ends)], ) return [(OSError(0, o) if isinstance(o, str) and o == "ISawAnError" else bytes(o.to_py())) for o in out] - def _open( self, path, diff --git a/pyscript-fsspec-client/pyscript_fsspec_client/io.py b/pyscript-fsspec-client/pyscript_fsspec_client/io.py index a2cba72..f54911a 100644 --- a/pyscript-fsspec-client/pyscript_fsspec_client/io.py +++ b/pyscript-fsspec-client/pyscript_fsspec_client/io.py @@ -1,31 +1,46 @@ import asyncio import js from pyodide import ffi, console +from pyscript import window + +LOGGING = False async def request(method, path, data=None, headers=None, outmode="text", **kwargs): - if data: - resp = await js.fetch(path, method=method, body=data.buffer, headers=headers or {}, - **kwargs) - else: - resp = await js.fetch(path, method=method, headers=headers or {}, - **kwargs) - if not resp.ok: + if LOGGING: + print(method, path, outmode, kwargs, headers) + try: + if data: + resp = await js.fetch(path, method=method, body=data.buffer, headers=headers or {}, + **kwargs) + else: + resp = await js.fetch(path, method=method, headers=ffi.to_js(headers) or {}, + **kwargs) + except Exception as e: + window.console.log(str(e)) return "ISawAnError" - if resp.status >= 400: - return "ISawAnError" - if outmode == "text": - return await resp.text() - if outmode == "bytes": - return await resp.arrayBuffer() - if outmode is None: - return - return "ISawAnError" + if not resp.ok: + out = "ISawAnError" + elif resp.status >= 400: + out = "ISawAnError" + elif outmode == "text": + out = await resp.text() + elif outmode == "bytes": + out = await resp.arrayBuffer() + elif outmode is None: + out = None + else: + out = "ISawAnError" + if LOGGING: + print(out) + return out -async def batch(requests, **kwargs): - return asyncio.gather( - *[request(*r, **kwargs) for r in requests], +async def batch(requests): + requests = [r.to_py() for r in requests] + out = asyncio.gather( + *[request(*r["args"], **r["kwargs"]) for r in requests], return_exceptions=True ) + return out