Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Control which files get moved to cache with gr.set_static_paths #7618

Merged
merged 10 commits into from Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/lemon-jobs-sneeze.md
@@ -0,0 +1,6 @@
---
"gradio": patch
"gradio_client": patch
---

fix:Control which files get moved to cache with gr.set_static_paths
1 change: 1 addition & 0 deletions client/python/gradio_client/documentation.py
Expand Up @@ -57,6 +57,7 @@ def extract_instance_attr_doc(cls, attr):
("gradio.route", "routes"),
("gradio.theme", "themes"),
("gradio_client.", "py-client"),
("gradio.utils", "helpers"),
]


Expand Down
2 changes: 1 addition & 1 deletion gradio/__init__.py
Expand Up @@ -97,6 +97,6 @@
TextArea,
)
from gradio.themes import Base as Theme
from gradio.utils import get_package_version
from gradio.utils import get_package_version, set_static_paths

__version__ = get_package_version()
1 change: 0 additions & 1 deletion gradio/blocks.py
Expand Up @@ -626,7 +626,6 @@ def __init__(

self.progress_tracking = None
self.ssl_verify = True

self.allowed_paths = []
self.blocked_paths = []
self.root_path = os.environ.get("GRADIO_ROOT_PATH", "")
Expand Down
16 changes: 16 additions & 0 deletions gradio/data_classes.py
Expand Up @@ -210,3 +210,19 @@ def __getitem__(self, index):

def __iter__(self):
return iter(self.root)


class _StaticFiles:
"""
Class to hold all static files for an app
"""

all_paths = []

def __init__(self, paths: list[str | pathlib.Path]) -> None:
self.paths = paths
self.all_paths.extend([pathlib.Path(p).resolve() for p in paths])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this .extend() behavior a little unintuitive: I would have expected:

gr.set_static_paths([])

to clear the static paths. Especially since we call the method set..., I think it should replace the previous paths, not extend the previous paths.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea good point! The reason I decided to do it this way is in the case you're nesting various blocks to make a single demo. I think we can have it replace the previous paths and add an api to extend previous paths later based on user request.


@classmethod
def clear(cls):
cls.all_paths = []
4 changes: 3 additions & 1 deletion gradio/processing_utils.py
Expand Up @@ -18,7 +18,7 @@
from gradio_client import utils as client_utils
from PIL import Image, ImageOps, PngImagePlugin

from gradio import wasm_utils
from gradio import utils, wasm_utils
from gradio.data_classes import FileData, GradioModel, GradioRootModel
from gradio.utils import abspath, get_upload_folder, is_in_or_equal

Expand Down Expand Up @@ -262,6 +262,8 @@ def _move_to_cache(d: dict):
# This makes it so that the URL is not downloaded and speeds up event processing
if payload.url and postprocess and client_utils.is_http_url_like(payload.url):
payload.path = payload.url
elif utils.is_static_file(payload):
payload.path = payload.path
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
payload.path = payload.path
pass

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

elif not block.proxy_url:
# If the file is on a remote server, do not move it to cache.
if check_in_upload_folder and not client_utils.is_http_url_like(
Expand Down
7 changes: 6 additions & 1 deletion gradio/routes.py
Expand Up @@ -495,13 +495,18 @@ async def file(path_or_url: str, request: fastapi.Request):
utils.is_in_or_equal(abs_path, allowed_path)
for allowed_path in blocks.allowed_paths
)
is_static_file = utils.is_static_file(abs_path)
was_uploaded = utils.is_in_or_equal(abs_path, app.uploaded_file_dir)
is_cached_example = utils.is_in_or_equal(
abs_path, utils.abspath(utils.get_cache_folder())
)

if not (
created_by_app or in_allowlist or was_uploaded or is_cached_example
created_by_app
or in_allowlist
or was_uploaded
or is_cached_example
or is_static_file
):
raise HTTPException(403, f"File not allowed: {path_or_url}.")

Expand Down
58 changes: 58 additions & 0 deletions gradio/utils.py
Expand Up @@ -40,10 +40,12 @@

import anyio
import httpx
from gradio_client.documentation import document
from typing_extensions import ParamSpec

import gradio
from gradio.context import Context
from gradio.data_classes import FileData
from gradio.strings import en

if TYPE_CHECKING: # Only import for type checking (is False at runtime).
Expand Down Expand Up @@ -958,6 +960,62 @@ def is_in_or_equal(path_1: str | Path, path_2: str | Path):
return True


@document()
def set_static_paths(paths: list[str | Path]) -> None:
"""
Set the static paths to be served by the gradio app.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I understand (and testing the code), this method sets the static_paths for all Gradio apps defined until that Python session ends (or set_static_paths is called again(. For example, if you have multiple Gradio apps in one jupyter notebook, this will affect all of them. We should explicitly describe this behavior here imo.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea good call!


Static files are not moved to the gradio cache and are served directly from the file system.
This function is useful when you want to serve files that you know will not be modified during the lifetime of the gradio app (like files used in gr.Examples).
By setting static paths, your app will launch faster and it will consume less disk space.

Parameters:
paths: List of filepaths or directory names to be served by the gradio app. If it is a directory name, ALL files located within that directory will be considered static and not moved to the gradio cache. This also means that ALL files in that directory will be accessible over the network.
Example:
import gradio as gr

# Paths can be a list of strings or pathlib.Path objects
# corresponding to filenames or directories.
gr.set_static_paths(paths=["test/test_files/"])

# The example files and the default value of the input
# will not be copied to the gradio cache and will be served directly.
demo = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/cheetah1.jpg", type="pil"),
gr.Image(),
examples=["test/test_files/bus.png"],
)

demo.launch()
"""
from gradio.data_classes import _StaticFiles

_StaticFiles.all_paths.extend([Path(p).resolve() for p in paths])


def is_static_file(file_path: Any):
"""Returns True if the file is a static file (and not moved to cache)"""
from gradio.data_classes import _StaticFiles

return _is_static_file(file_path, _StaticFiles.all_paths)


def _is_static_file(file_path: Any, static_files: list[Path]) -> bool:
"""
Returns True if the file is a static file (i.e. is is in the static files list).
"""
if not isinstance(file_path, (str, Path, FileData)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless I missed something, file_path is always a FileData in all invocations of _is_static_file() so the other cases are not necessary.

return False
if isinstance(file_path, FileData):
file_path = file_path.path
if isinstance(file_path, str):
file_path = Path(file_path)
if not file_path.exists():
return False
return any(is_in_or_equal(file_path, static_file) for static_file in static_files)


HTML_TAG_RE = re.compile("<.*?>")


Expand Down
12 changes: 12 additions & 0 deletions test/conftest.py
Expand Up @@ -70,3 +70,15 @@ def gradio_temp_dir(monkeypatch, tmp_path):
"""
monkeypatch.setenv("GRADIO_TEMP_DIR", str(tmp_path))
return tmp_path


@pytest.fixture(autouse=True)
def clear_static_files():
"""Clears all static files from the _StaticFiles class.

This is necessary because the tests should be independent of one another.
"""
yield
from gradio import data_classes

data_classes._StaticFiles.clear()
44 changes: 44 additions & 0 deletions test/test_blocks.py
Expand Up @@ -1707,3 +1707,47 @@ def clear_func():
demo.postprocess_data(0, [gr.Chatbot(value=[])] * 3, None)
== [{"value": [], "__type__": "update"}] * 3
)


def test_static_files_single_app(connect, gradio_temp_dir):
gr.set_static_paths(
paths=["test/test_files/cheetah1.jpg", "test/test_files/bus.png"]
)
demo = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/cheetah1.jpg", type="pil"),
gr.Image(),
examples=["test/test_files/bus.png"],
)

# Nothing got saved to cache
assert len(list(gradio_temp_dir.glob("**/*.*"))) == 0

with connect(demo) as client:
client.predict("test/test_files/bus.png")

# Input/Output got saved to cache
assert len(list(gradio_temp_dir.glob("**/*.*"))) == 2


def test_static_files_multiple_apps(gradio_temp_dir):
gr.set_static_paths(paths=["test/test_files/cheetah1.jpg"])
demo = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/cheetah1.jpg"),
gr.Image(),
)

gr.set_static_paths(paths=["test/test_files/images"])
demo_2 = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/images/bus.png"),
gr.Image(),
)

with gr.Blocks():
demo.render()
demo_2.render()

# Input/Output got saved to cache
assert len(list(gradio_temp_dir.glob("**/*.*"))) == 0