Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lite: Fix the analytics module to use asyncio to work in the Wasm env #5045

Merged
merged 14 commits into from Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/hungry-bobcats-own.md
@@ -0,0 +1,5 @@
---
"gradio": minor
---

feat:Lite: Fix the analytics module to use asyncio to work in the Wasm env
93 changes: 65 additions & 28 deletions gradio/analytics.py
@@ -1,20 +1,34 @@
""" Functions related to analytics and telemetry. """
from __future__ import annotations

import asyncio
import json
import os
import pkgutil
import threading
import urllib.parse
import warnings
from distutils.version import StrictVersion
from typing import Any

import requests

import gradio
from gradio import wasm_utils
from gradio.context import Context
from gradio.utils import GRADIO_VERSION

# For testability, we import the pyfetch function into this module scope and define a fallback coroutine object to be patched in tests.
try:
from pyodide.http import pyfetch as pyodide_pyfetch # type: ignore
except ImportError:

async def pyodide_pyfetch(*args, **kwargs):
raise NotImplementedError(
"pyodide.http.pyfetch is not available in this environment."
)


ANALYTICS_URL = "https://api.gradio.app/"
PKG_VERSION_URL = "https://api.gradio.app/pkg-version"

Expand All @@ -27,13 +41,50 @@ def analytics_enabled() -> bool:


def _do_analytics_request(url: str, data: dict[str, Any]) -> None:
if wasm_utils.IS_WASM:
asyncio.ensure_future(
_do_wasm_analytics_request(
url=url,
data=data,
)
)
else:
threading.Thread(
target=_do_normal_analytics_request,
kwargs={
"url": url,
"data": data,
},
).start()


def _do_normal_analytics_request(url: str, data: dict[str, Any]) -> None:
data["ip_address"] = get_local_ip_address()
try:
requests.post(url, data=data, timeout=5)
except (requests.ConnectionError, requests.exceptions.ReadTimeout):
pass # do not push analytics if no network


async def _do_wasm_analytics_request(url: str, data: dict[str, Any]) -> None:
data["ip_address"] = "No internet connection"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the current get_local_ip_address() doesn't work in the Wasm env, and even if we make its Wasm-compatible version, gathering users' public IP-address looks too much, I hardcoded the string literal representing the failure of IP address detection.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gathering users' public IP-address looks too much

What do you mean by looks too much. Do you mean that its too intrusive? Or that its not techincally feasible?

The reason that I ask is that main benefit of collecting analytics is that it allows us to measure daily, weekly, and monthly unique users which we might not be able to do if we don't collect IP addresses.

Copy link
Member Author

@whitphx whitphx Aug 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I felt it's too intrusive.
In the case of Gradio-lite, this telemetry will be sent from clients' device immediately after they open the app page, where they don't have any chance to set analytics_enabled=false to opt-out it. I think some amount of people consider it as a bad behavior to automatically access the third-party API (https://checkip.amazonaws.com/) to get the IP address and send it as analytics data, under the recent situation where people are sensitive to data privacy.
However, getting IP addresses itself might be allowed as long as they are not logged or properly stored securely (I'm not sure), so I would like some advice from experts.

Copy link
Member Author

@whitphx whitphx Aug 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pushed a commit including the IP address fetcher (b77247c), though we should revert it if it looks not good anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice @whitphx. Let's keep it for now -- as part of #4226, we might replace the IP address with a hash of the IP address so that we can still keep a track of unique users without sending IP addresses.


# We use urllib.parse.urlencode to encode the data as a form.
# Ref: https://docs.python.org/3/library/urllib.request.html#urllib-examples
body = urllib.parse.urlencode(data).encode("ascii")
headers = {
"Content-Type": "application/x-www-form-urlencoded",
}

try:
await asyncio.wait_for(
pyodide_pyfetch(url, method="POST", headers=headers, body=body),
timeout=5,
)
except asyncio.TimeoutError:
pass # do not push analytics if no network


def version_check():
try:
version_data = pkgutil.get_data(__name__, "version.txt")
Expand Down Expand Up @@ -84,13 +135,10 @@ def initiated_analytics(data: dict[str, Any]) -> None:
if not analytics_enabled():
return

threading.Thread(
target=_do_analytics_request,
kwargs={
"url": f"{ANALYTICS_URL}gradio-initiated-analytics/",
"data": data,
},
).start()
_do_analytics_request(
url=f"{ANALYTICS_URL}gradio-initiated-analytics/",
data=data,
)


def launched_analytics(blocks: gradio.Blocks, data: dict[str, Any]) -> None:
Expand Down Expand Up @@ -142,30 +190,22 @@ def launched_analytics(blocks: gradio.Blocks, data: dict[str, Any]) -> None:
"targets": targets_telemetry,
"blocks": blocks_telemetry,
"events": [str(x["trigger"]) for x in blocks.dependencies],
"is_wasm": wasm_utils.IS_WASM,
}

abidlabs marked this conversation as resolved.
Show resolved Hide resolved
data.update(additional_data)

threading.Thread(
target=_do_analytics_request,
kwargs={
"url": f"{ANALYTICS_URL}gradio-launched-telemetry/",
"data": data,
},
).start()
_do_analytics_request(url=f"{ANALYTICS_URL}gradio-launched-telemetry/", data=data)


def integration_analytics(data: dict[str, Any]) -> None:
if not analytics_enabled():
return

threading.Thread(
target=_do_analytics_request,
kwargs={
"url": f"{ANALYTICS_URL}gradio-integration-analytics/",
"data": data,
},
).start()
_do_analytics_request(
url=f"{ANALYTICS_URL}gradio-integration-analytics/",
data=data,
)


def error_analytics(message: str) -> None:
Expand All @@ -179,10 +219,7 @@ def error_analytics(message: str) -> None:

data = {"error": message}

threading.Thread(
target=_do_analytics_request,
kwargs={
"url": f"{ANALYTICS_URL}gradio-error-analytics/",
"data": data,
},
).start()
_do_analytics_request(
url=f"{ANALYTICS_URL}gradio-error-analytics/",
data=data,
)
42 changes: 28 additions & 14 deletions gradio/blocks.py
Expand Up @@ -721,8 +721,9 @@ def __init__(
else analytics.analytics_enabled()
)
if self.analytics_enabled:
t = threading.Thread(target=analytics.version_check)
t.start()
if not wasm_utils.IS_WASM:
t = threading.Thread(target=analytics.version_check)
t.start()
else:
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "True"
super().__init__(render=False, **kwargs)
Expand Down Expand Up @@ -761,7 +762,7 @@ def __init__(
self.root_path = ""
self.root_urls = set()

if not wasm_utils.IS_WASM and self.analytics_enabled:
if self.analytics_enabled:
is_custom_theme = not any(
self.theme.to_dict() == built_in_theme.to_dict()
for built_in_theme in BUILT_IN_THEMES.values()
Expand Down Expand Up @@ -1863,7 +1864,7 @@ def reverse(text):
if self.local_url.startswith("https") or self.is_colab
else "http"
)
if not self.is_colab:
if not wasm_utils.IS_WASM and not self.is_colab:
print(
strings.en["RUNNING_LOCALLY_SEPARATED"].format(
self.protocol, self.server_name, self.server_port
Expand All @@ -1873,13 +1874,13 @@ def reverse(text):
if self.enable_queue:
self._queue.set_url(self.local_url)

# Cannot run async functions in background other than app's scope.
# Workaround by triggering the app endpoint
if not wasm_utils.IS_WASM:
# Cannot run async functions in background other than app's scope.
# Workaround by triggering the app endpoint
requests.get(f"{self.local_url}startup-events", verify=ssl_verify)

if wasm_utils.IS_WASM:
abidlabs marked this conversation as resolved.
Show resolved Hide resolved
return TupleNoPrint((self.server_app, self.local_url, self.share_url))
else:
pass
# TODO: Call the startup endpoint in the Wasm env too.

utils.launch_counter()
self.is_sagemaker = utils.sagemaker_check()
Expand Down Expand Up @@ -1909,7 +1910,12 @@ def reverse(text):

# If running in a colab or not able to access localhost,
# a shareable link must be created.
if _frontend and (not networking.url_ok(self.local_url)) and (not self.share):
if (
_frontend
and not wasm_utils.IS_WASM
and not networking.url_ok(self.local_url)
and not self.share
):
raise ValueError(
"When localhost is not accessible, a shareable link must be created. Please set share=True or check your proxy settings to allow access to localhost."
)
Expand All @@ -1930,6 +1936,8 @@ def reverse(text):
if self.share:
if self.space_id:
raise RuntimeError("Share is not supported when you are in Spaces")
if wasm_utils.IS_WASM:
raise RuntimeError("Share is not supported in the Wasm environment")
try:
if self.share_url is None:
self.share_url = networking.setup_tunnel(
Expand All @@ -1955,11 +1963,11 @@ def reverse(text):
)
)
else:
if not (quiet):
if not quiet and not wasm_utils.IS_WASM:
print(strings.en["PUBLIC_SHARE_TRUE"])
self.share_url = None

if inbrowser:
if inbrowser and not wasm_utils.IS_WASM:
link = self.share_url if self.share and self.share_url else self.local_url
webbrowser.open(link)

Expand Down Expand Up @@ -2040,12 +2048,18 @@ def reverse(text):
utils.show_tip(self)

# Block main thread if debug==True
if debug or int(os.getenv("GRADIO_DEBUG", 0)) == 1:
if debug or int(os.getenv("GRADIO_DEBUG", 0)) == 1 and not wasm_utils.IS_WASM:
self.block_thread()
# Block main thread if running in a script to stop script from exiting
is_in_interactive_mode = bool(getattr(sys, "ps1", sys.flags.interactive))

if not prevent_thread_lock and not is_in_interactive_mode:
if (
not prevent_thread_lock
and not is_in_interactive_mode
# In the Wasm env, we don't have to block the main thread because the server won't be shut down after the execution finishes.
# Moreover, we MUST NOT do it because there is only one thread in the Wasm env and blocking it will stop the subsequent code from running.
and not wasm_utils.IS_WASM
):
self.block_thread()

return TupleNoPrint((self.server_app, self.local_url, self.share_url))
Expand Down
25 changes: 23 additions & 2 deletions test/test_analytics.py
@@ -1,3 +1,4 @@
import asyncio
import ipaddress
import json
import os
Expand All @@ -7,7 +8,7 @@
import pytest
import requests

from gradio import analytics
from gradio import analytics, wasm_utils
from gradio.context import Context

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
Expand All @@ -33,7 +34,7 @@ def test_error_analytics_doesnt_crash_on_connection_error(
):
monkeypatch.setenv("GRADIO_ANALYTICS_ENABLED", "True")
mock_post.side_effect = requests.ConnectionError()
analytics._do_analytics_request("placeholder", {})
analytics._do_normal_analytics_request("placeholder", {})
mock_post.assert_called()

@mock.patch("requests.post")
Expand All @@ -42,6 +43,26 @@ def test_error_analytics_successful(self, mock_post, monkeypatch):
analytics.error_analytics("placeholder")
mock_post.assert_called()

@mock.patch.object(wasm_utils, "IS_WASM", True)
@mock.patch("gradio.analytics.pyodide_pyfetch")
@pytest.mark.asyncio
async def test_error_analytics_successful_in_wasm_mode(
self, pyodide_pyfetch, monkeypatch
):
loop = asyncio.get_event_loop()
monkeypatch.setenv("GRADIO_ANALYTICS_ENABLED", "True")

analytics.error_analytics("placeholder")

# Await all background tasks.
# Ref: https://superfastpython.com/asyncio-wait-for-tasks/#How_to_Wait_for_All_Background_Tasks
all_tasks = asyncio.all_tasks(loop)
current_task = asyncio.current_task()
all_tasks.remove(current_task)
await asyncio.wait(all_tasks)

pyodide_pyfetch.assert_called()


class TestIPAddress:
@pytest.mark.flaky
Expand Down