From b50b27df32ca9dfd932100c4f035a350b7f12cff Mon Sep 17 00:00:00 2001 From: dvsrepo Date: Sat, 20 Aug 2022 10:22:47 +0200 Subject: [PATCH 01/31] docs: Add base documentation --- docs/community/telemetry.md | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 docs/community/telemetry.md diff --git a/docs/community/telemetry.md b/docs/community/telemetry.md new file mode 100644 index 0000000000..1447aa9b09 --- /dev/null +++ b/docs/community/telemetry.md @@ -0,0 +1,46 @@ +# Telemetry +Rubrix uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. + +## How to opt-out +You can opt-out of telemetry reporting using the `ENV` variable `var_name_tbd` before launching the server. Setting this variable to `false` will completely disable telemetry reporting. + +If you are a Linux/MacOs users you should run: + +```bash +bash command to disable telemetry +``` + +If you are Windows users you should run: + +```bash +bash command to disable telemetry +``` + +To opt-in again, you can set the variable to `true`. + +## Why reporting telemetry +Anonymous telemetry information enable us to continously improve the product and detect recurring problems to better serve all users. We collect aggregated information about general usage and errors. We do NOT collect any information of users' data records, datasets, or metadata information. + +## Sensitive data +We do not collect any piece of information related to the source data you store in Rubrix. We don't identify individual users. Your data does not leave your server at any time: + +* No dataset record is collected. +* No dataset names or metadata are collected. + +## Information reported +The following usage and error information is reported: + + +* exhaustive ist of fields/info +* ... + +This is performed by registering information from the following API methods: + +* `/api/me` +* `/api/dataset/.../bulk` +* API errors + + +For transparency, you can inspect the source code where this is performed here (add link to the source). + +If you have any doubts, don't hesitate to join our [Slack channel](https://join.slack.com/t/rubrixworkspace/shared_invite/zt-whigkyjn-a3IUJLD7gDbTZ0rKlvcJ5g) or open a GitHub issue. We'd be very happy to discuss about how we can improve this. From 6dfbf048da7a0354794eadbc41d2bcaafadb2ab4 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Tue, 23 Aug 2022 17:04:29 +0200 Subject: [PATCH 02/31] wip: define telemetry components --- src/rubrix/server/apis/v0/handlers/users.py | 6 +- src/rubrix/server/commons/telemetry.py | 66 +++++++++++++++++++++ src/rubrix/server/errors/api_errors.py | 3 + src/rubrix/server/settings.py | 2 + 4 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 src/rubrix/server/commons/telemetry.py diff --git a/src/rubrix/server/apis/v0/handlers/users.py b/src/rubrix/server/apis/v0/handlers/users.py index 14d5668b2b..178c503071 100644 --- a/src/rubrix/server/apis/v0/handlers/users.py +++ b/src/rubrix/server/apis/v0/handlers/users.py @@ -22,10 +22,7 @@ @router.get( - "/me", - response_model=User, - response_model_exclude_none=True, - operation_id="whoami", + "/me", response_model=User, response_model_exclude_none=True, operation_id="whoami" ) async def whoami(current_user: User = Security(auth.get_user, scopes=[])): """ @@ -41,4 +38,5 @@ async def whoami(current_user: User = Security(auth.get_user, scopes=[])): The current user """ + return current_user diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py new file mode 100644 index 0000000000..f1ae59b797 --- /dev/null +++ b/src/rubrix/server/commons/telemetry.py @@ -0,0 +1,66 @@ +import dataclasses +import uuid +from typing import Any, Dict, Optional + +from rubrix.server.errors import RubrixServerError +from rubrix.server.services.tasks.commons import TaskType +from rubrix.server.settings import settings + + +@dataclasses.dataclass +class _TelemetryClient: + + __API_KEY__ = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" + + __server_id__: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) + + __INSTANCE__: "_TelemetryClient" = None + + @classmethod + def get(cls): + if settings.enable_telemetry: + if cls.__INSTANCE__ is None: + cls.__INSTANCE__ = cls() + return cls.__INSTANCE__ + + def __post_init__(self): + import platform + import sys + + import analytics + + from rubrix import __version__ + + analytics.write_key = self.__API_KEY__ + self.__server_id__ = uuid.UUID(int=uuid.getnode()) + self.__system_info__ = { + "system": platform.system(), + "machine": platform.machine(), + "platform": platform.platform(), + "python_version": sys.version, + "sys_version": platform.version(), + "rubrix_version": __version__, + } + + def track_data(self, action: str, data: Dict[str, Any]): + import analytics + + analytics.track(self.__server_id__, action, {**data, **self.__system_info__}) + + +async def track_error(error: RubrixServerError): + client = _TelemetryClient.get() + if client: + client.track_data("ServerError", {"code": error.code}) + + +async def track_bulk(task: TaskType, records: int): + client = _TelemetryClient.get() + if client: + client.track_data("BulkData", {"task": task, records: records}) + + +async def track_login(): + client = _TelemetryClient.get() + if client: + client.track_data("UserLogged", {}) diff --git a/src/rubrix/server/errors/api_errors.py b/src/rubrix/server/errors/api_errors.py index 688f49828d..4b1128110c 100644 --- a/src/rubrix/server/errors/api_errors.py +++ b/src/rubrix/server/errors/api_errors.py @@ -5,6 +5,7 @@ from fastapi.exception_handlers import http_exception_handler from pydantic import BaseModel +from rubrix.server.commons import telemetry from rubrix.server.errors.adapter import exception_to_rubrix_error from rubrix.server.errors.base_errors import RubrixServerError @@ -40,6 +41,8 @@ class APIErrorHandler: async def common_exception_handler(request: Request, error: Exception): """Wraps errors as custom generic error""" rubrix_error = exception_to_rubrix_error(error) + await telemetry.track_error(rubrix_error) + return await http_exception_handler( request, RubrixServerHTTPException(rubrix_error) ) diff --git a/src/rubrix/server/settings.py b/src/rubrix/server/settings.py index 11068f38da..71644beceb 100644 --- a/src/rubrix/server/settings.py +++ b/src/rubrix/server/settings.py @@ -82,6 +82,8 @@ class ApiSettings(BaseSettings): default=50, gt=0, le=100, description="Max number of fields in metadata" ) + enable_telemetry: bool = True + @validator("disable_es_index_template_creation", always=True) def check_index_template_creation_value(cls, value): From c78a4e5cb2d6bb5be844871e448dab0cd8da2dc2 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 13:33:45 +0200 Subject: [PATCH 03/31] refactor: using client instance --- src/rubrix/server/commons/telemetry.py | 34 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index f1ae59b797..677189cb43 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -2,19 +2,36 @@ import uuid from typing import Any, Dict, Optional +from rubrix.server.commons.models import TaskType from rubrix.server.errors import RubrixServerError -from rubrix.server.services.tasks.commons import TaskType from rubrix.server.settings import settings +try: + from analytics import Client +except ModuleNotFoundError: + # TODO: show some warning info + settings.enable_telemetry = False + + +def _configure_analytics() -> Client: + API_KEY = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" + + return Client( + write_key=API_KEY, + gzip=True, + send=True, # TODO: set to False for testing + ) + @dataclasses.dataclass class _TelemetryClient: - __API_KEY__ = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" + __INSTANCE__: "_TelemetryClient" = None __server_id__: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) - - __INSTANCE__: "_TelemetryClient" = None + __client__: Client = dataclasses.field( + init=False, default_factory=_configure_analytics + ) @classmethod def get(cls): @@ -27,11 +44,8 @@ def __post_init__(self): import platform import sys - import analytics - from rubrix import __version__ - analytics.write_key = self.__API_KEY__ self.__server_id__ = uuid.UUID(int=uuid.getnode()) self.__system_info__ = { "system": platform.system(), @@ -43,9 +57,9 @@ def __post_init__(self): } def track_data(self, action: str, data: Dict[str, Any]): - import analytics - - analytics.track(self.__server_id__, action, {**data, **self.__system_info__}) + self.__client__.track( + self.__server_id__, action, {**data, **self.__system_info__} + ) async def track_error(error: RubrixServerError): From 1d97e6952194ecf94b9ce006cd71c038ceec2e9a Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 13:38:51 +0200 Subject: [PATCH 04/31] feat: track after login --- src/rubrix/server/apis/v0/handlers/users.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rubrix/server/apis/v0/handlers/users.py b/src/rubrix/server/apis/v0/handlers/users.py index 178c503071..6aabe1d4d3 100644 --- a/src/rubrix/server/apis/v0/handlers/users.py +++ b/src/rubrix/server/apis/v0/handlers/users.py @@ -15,6 +15,7 @@ from fastapi import APIRouter, Security +from rubrix.server.commons import telemetry from rubrix.server.security import auth from rubrix.server.security.model import User @@ -39,4 +40,5 @@ async def whoami(current_user: User = Security(auth.get_user, scopes=[])): """ + await telemetry.track_login() return current_user From 294c7b61abafad2e30582c546377127842a13a4e Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 13:44:11 +0200 Subject: [PATCH 05/31] feat: track add records --- src/rubrix/server/services/storage/service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rubrix/server/services/storage/service.py b/src/rubrix/server/services/storage/service.py index 72007fcc56..e2429d49f4 100644 --- a/src/rubrix/server/services/storage/service.py +++ b/src/rubrix/server/services/storage/service.py @@ -2,6 +2,7 @@ from fastapi import Depends +from rubrix.server.commons import telemetry from rubrix.server.commons.config import TasksFactory from rubrix.server.daos.records import DatasetRecordsDAO from rubrix.server.services.datasets import ServiceDataset @@ -31,6 +32,8 @@ def store_records( record_type: Type[ServiceRecord], ) -> int: """Store a set of records""" + telemetry.track_bulk(task=dataset.task, records=len(records)) + metrics = TasksFactory.get_task_metrics(dataset.task) if metrics: for record in records: From 792a6b011cced2db1658c42b5c79e153e52f18f6 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 13:44:58 +0200 Subject: [PATCH 06/31] tests: disable send for now --- src/rubrix/server/commons/telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index 677189cb43..38f40a3476 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -19,7 +19,7 @@ def _configure_analytics() -> Client: return Client( write_key=API_KEY, gzip=True, - send=True, # TODO: set to False for testing + send=False, # TODO: set to False for testing ) From 75b7ac9b092c6636359a64b0ffd06ab57592fbf1 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 14:47:14 +0200 Subject: [PATCH 07/31] chore: add missing dep --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 88e29e50ff..8a4b4abbc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,8 @@ server = [ "passlib[bcrypt]~=1.7.4", # Info status "psutil ~= 5.8.0", + # Telemetry + "segment-analytics-python" ] listeners = [ "schedule ~= 1.1.0", From fe0a1347c15547b531d665752e1accc9893c0c08 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 15:38:08 +0200 Subject: [PATCH 08/31] fix: server id as string --- src/rubrix/server/commons/telemetry.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index 38f40a3476..d24acd1cf8 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -1,9 +1,11 @@ import dataclasses +import platform +import sys import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict from rubrix.server.commons.models import TaskType -from rubrix.server.errors import RubrixServerError +from rubrix.server.errors.base_errors import RubrixServerError from rubrix.server.settings import settings try: @@ -28,7 +30,7 @@ class _TelemetryClient: __INSTANCE__: "_TelemetryClient" = None - __server_id__: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) + __server_id__: str = dataclasses.field(init=False, default=None) __client__: Client = dataclasses.field( init=False, default_factory=_configure_analytics ) @@ -41,17 +43,19 @@ def get(cls): return cls.__INSTANCE__ def __post_init__(self): - import platform - import sys from rubrix import __version__ - self.__server_id__ = uuid.UUID(int=uuid.getnode()) + self.__server_id__ = str(uuid.UUID(int=uuid.getnode())) self.__system_info__ = { "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), - "python_version": sys.version, + "python_version": "{major}.{minor}.{patch}".format( + major=sys.version_info.major, + minor=sys.version_info.minor, + patch=sys.version_info.micro, + ), "sys_version": platform.version(), "rubrix_version": __version__, } @@ -71,7 +75,7 @@ async def track_error(error: RubrixServerError): async def track_bulk(task: TaskType, records: int): client = _TelemetryClient.get() if client: - client.track_data("BulkData", {"task": task, records: records}) + client.track_data("BulkData", {"task": task, "records": records}) async def track_login(): From a717c2776db70e9ba4041c91ed5b4e2a338e604f Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 15:38:40 +0200 Subject: [PATCH 09/31] tests: add tests --- tests/server/commons/__init__.py | 0 tests/server/commons/test_telemetry.py | 34 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 tests/server/commons/__init__.py create mode 100644 tests/server/commons/test_telemetry.py diff --git a/tests/server/commons/__init__.py b/tests/server/commons/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py new file mode 100644 index 0000000000..7af4d4245e --- /dev/null +++ b/tests/server/commons/test_telemetry.py @@ -0,0 +1,34 @@ +import pytest + +from rubrix.server.commons import telemetry +from rubrix.server.commons.models import TaskType +from rubrix.server.errors import RubrixServerError + + +@pytest.mark.asyncio +async def test_track_login(mocker): + client = telemetry._TelemetryClient.get() + spy = mocker.spy(client, "track_data") + + await telemetry.track_login() + spy.assert_called_once_with("UserLogged", {}) + + +@pytest.mark.asyncio +async def test_track_bulk(mocker): + client = telemetry._TelemetryClient.get() + spy = mocker.spy(client, "track_data") + + task, records = TaskType.token_classification, 100 + await telemetry.track_bulk(task=task, records=records) + spy.assert_called_once_with("BulkData", {"task": task, "records": records}) + + +@pytest.mark.asyncio +async def test_track_error(mocker): + client = telemetry._TelemetryClient.get() + spy = mocker.spy(client, "track_data") + + error = RubrixServerError() + await telemetry.track_error(error) + spy.assert_called_once_with("ServerError", {"code": error.get_error_code()}) From a3be2e199187b15a36f621821329503c35a411b2 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 15:43:42 +0200 Subject: [PATCH 10/31] tests: track_data fixture --- tests/server/commons/test_telemetry.py | 26 ++++++++++++-------------- tests/server/conftest.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 14 deletions(-) create mode 100644 tests/server/conftest.py diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 7af4d4245e..4d9404f7f8 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -6,29 +6,27 @@ @pytest.mark.asyncio -async def test_track_login(mocker): - client = telemetry._TelemetryClient.get() - spy = mocker.spy(client, "track_data") +async def test_track_login(telemetry_track_data): await telemetry.track_login() - spy.assert_called_once_with("UserLogged", {}) + telemetry_track_data.assert_called_once_with("UserLogged", {}) @pytest.mark.asyncio -async def test_track_bulk(mocker): - client = telemetry._TelemetryClient.get() - spy = mocker.spy(client, "track_data") - +async def test_track_bulk(telemetry_track_data): task, records = TaskType.token_classification, 100 + await telemetry.track_bulk(task=task, records=records) - spy.assert_called_once_with("BulkData", {"task": task, "records": records}) + telemetry_track_data.assert_called_once_with( + "BulkData", {"task": task, "records": records} + ) @pytest.mark.asyncio -async def test_track_error(mocker): - client = telemetry._TelemetryClient.get() - spy = mocker.spy(client, "track_data") - +async def test_track_error(telemetry_track_data): error = RubrixServerError() + await telemetry.track_error(error) - spy.assert_called_once_with("ServerError", {"code": error.get_error_code()}) + telemetry_track_data.assert_called_once_with( + "ServerError", {"code": error.get_error_code()} + ) diff --git a/tests/server/conftest.py b/tests/server/conftest.py new file mode 100644 index 0000000000..326a371a2d --- /dev/null +++ b/tests/server/conftest.py @@ -0,0 +1,11 @@ +import pytest + +from rubrix.server.commons import telemetry + + +@pytest.fixture +def telemetry_track_data(mocker): + client = telemetry._TelemetryClient.get() + spy = mocker.spy(client, "track_data") + + return spy From 6c83f332b1a3ce4a8ded25a41e0cf59e4f86be4a Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 15:51:24 +0200 Subject: [PATCH 11/31] fix: telemetry requires async --- src/rubrix/server/apis/v0/handlers/text2text.py | 4 ++-- src/rubrix/server/apis/v0/handlers/text_classification.py | 2 +- src/rubrix/server/apis/v0/handlers/token_classification.py | 2 +- src/rubrix/server/services/storage/service.py | 4 ++-- src/rubrix/server/services/tasks/text2text/service.py | 4 ++-- .../server/services/tasks/text_classification/service.py | 4 ++-- .../server/services/tasks/token_classification/service.py | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/rubrix/server/apis/v0/handlers/text2text.py b/src/rubrix/server/apis/v0/handlers/text2text.py index d9f09fc6be..402c4b4c3f 100644 --- a/src/rubrix/server/apis/v0/handlers/text2text.py +++ b/src/rubrix/server/apis/v0/handlers/text2text.py @@ -69,7 +69,7 @@ response_model=BulkResponse, response_model_exclude_none=True, ) -def bulk_records( +async def bulk_records( name: str, bulk: Text2TextBulkRequest, common_params: CommonTaskHandlerDependencies = Depends(), @@ -100,7 +100,7 @@ def bulk_records( dataset.owner = owner datasets.create_dataset(user=current_user, dataset=dataset) - result = service.add_records( + result = await service.add_records( dataset=dataset, records=[ServiceText2TextRecord.parse_obj(r) for r in bulk.records], ) diff --git a/src/rubrix/server/apis/v0/handlers/text_classification.py b/src/rubrix/server/apis/v0/handlers/text_classification.py index cb18f787f3..2c68d9b917 100644 --- a/src/rubrix/server/apis/v0/handlers/text_classification.py +++ b/src/rubrix/server/apis/v0/handlers/text_classification.py @@ -126,7 +126,7 @@ async def bulk_records( user=current_user, dataset=dataset, records=records ) - result = service.add_records( + result = await service.add_records( dataset=dataset, records=records, ) diff --git a/src/rubrix/server/apis/v0/handlers/token_classification.py b/src/rubrix/server/apis/v0/handlers/token_classification.py index 969fb0e8b3..ea81e0e743 100644 --- a/src/rubrix/server/apis/v0/handlers/token_classification.py +++ b/src/rubrix/server/apis/v0/handlers/token_classification.py @@ -121,7 +121,7 @@ async def bulk_records( records=records, ) - result = service.add_records( + result = await service.add_records( dataset=dataset, records=records, ) diff --git a/src/rubrix/server/services/storage/service.py b/src/rubrix/server/services/storage/service.py index e2429d49f4..31883db7fe 100644 --- a/src/rubrix/server/services/storage/service.py +++ b/src/rubrix/server/services/storage/service.py @@ -25,14 +25,14 @@ def get_instance( def __init__(self, dao: DatasetRecordsDAO): self.__dao__ = dao - def store_records( + async def store_records( self, dataset: ServiceDataset, records: List[ServiceRecord], record_type: Type[ServiceRecord], ) -> int: """Store a set of records""" - telemetry.track_bulk(task=dataset.task, records=len(records)) + await telemetry.track_bulk(task=dataset.task, records=len(records)) metrics = TasksFactory.get_task_metrics(dataset.task) if metrics: diff --git a/src/rubrix/server/services/tasks/text2text/service.py b/src/rubrix/server/services/tasks/text2text/service.py index fdb503c8ab..b462aad6a2 100644 --- a/src/rubrix/server/services/tasks/text2text/service.py +++ b/src/rubrix/server/services/tasks/text2text/service.py @@ -60,12 +60,12 @@ def __init__( self.__storage__ = storage self.__search__ = search - def add_records( + async def add_records( self, dataset: ServiceText2TextDataset, records: List[ServiceText2TextRecord], ): - failed = self.__storage__.store_records( + failed = await self.__storage__.store_records( dataset=dataset, records=records, record_type=ServiceText2TextRecord, diff --git a/src/rubrix/server/services/tasks/text_classification/service.py b/src/rubrix/server/services/tasks/text_classification/service.py index cecc47c386..02e4fe3345 100644 --- a/src/rubrix/server/services/tasks/text_classification/service.py +++ b/src/rubrix/server/services/tasks/text_classification/service.py @@ -67,7 +67,7 @@ def __init__( self.__search__ = search self.__labeling__ = labeling - def add_records( + async def add_records( self, dataset: ServiceTextClassificationDataset, records: List[ServiceTextClassificationRecord], @@ -75,7 +75,7 @@ def add_records( # TODO(@frascuchon): This will moved to dataset settings validation once DatasetSettings join the game! self._check_multi_label_integrity(dataset, records) - failed = self.__storage__.store_records( + failed = await self.__storage__.store_records( dataset=dataset, records=records, record_type=ServiceTextClassificationRecord, diff --git a/src/rubrix/server/services/tasks/token_classification/service.py b/src/rubrix/server/services/tasks/token_classification/service.py index fc8bc24e2b..bb82a776e7 100644 --- a/src/rubrix/server/services/tasks/token_classification/service.py +++ b/src/rubrix/server/services/tasks/token_classification/service.py @@ -57,12 +57,12 @@ def __init__( self.__storage__ = storage self.__search__ = search - def add_records( + async def add_records( self, dataset: ServiceTokenClassificationDataset, records: List[ServiceTokenClassificationRecord], ): - failed = self.__storage__.store_records( + failed = await self.__storage__.store_records( dataset=dataset, records=records, record_type=ServiceTokenClassificationRecord, From 715755bd4fc200ffeb1c805887b4617799c36468 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 15:52:35 +0200 Subject: [PATCH 12/31] tests: include more tests --- tests/server/text_classification/test_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/server/text_classification/test_api.py b/tests/server/text_classification/test_api.py index 1e8a12248b..7db090985b 100644 --- a/tests/server/text_classification/test_api.py +++ b/tests/server/text_classification/test_api.py @@ -119,7 +119,7 @@ def test_create_records_for_text_classification_with_multi_label(mocked_client): assert results.records[0].predicted is None -def test_create_records_for_text_classification(mocked_client): +def test_create_records_for_text_classification(mocked_client, telemetry_track_data): dataset = "test_create_records_for_text_classification" assert mocked_client.delete(f"/api/datasets/{dataset}").status_code == 200 tags = {"env": "test", "class": "text classification"} @@ -178,6 +178,8 @@ def test_create_records_for_text_classification(mocked_client): "words": {"data": 1}, } + telemetry_track_data.assert_called_once() + def test_partial_record_update(mocked_client): name = "test_partial_record_update" From 6d1f3f57da36abb234146da1d1adb4bc1b26900b Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 16:30:51 +0200 Subject: [PATCH 13/31] fix: prevent any import error --- src/rubrix/server/commons/telemetry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index d24acd1cf8..1b538f9876 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -13,6 +13,7 @@ except ModuleNotFoundError: # TODO: show some warning info settings.enable_telemetry = False + Client = None def _configure_analytics() -> Client: From fdbb9d6c248f33f26a9f02880dd22f7ba1d7da0e Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 16:41:40 +0200 Subject: [PATCH 14/31] tests: disable sending telemetry events for tests --- src/rubrix/server/commons/telemetry.py | 10 ++++------ tests/server/conftest.py | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index 1b538f9876..cbbefa2660 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -16,13 +16,13 @@ Client = None -def _configure_analytics() -> Client: +def _configure_analytics(disable_send: bool = True) -> Client: API_KEY = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" return Client( write_key=API_KEY, gzip=True, - send=False, # TODO: set to False for testing + send=not disable_send, # TODO: set to False for testing ) @@ -32,7 +32,7 @@ class _TelemetryClient: __INSTANCE__: "_TelemetryClient" = None __server_id__: str = dataclasses.field(init=False, default=None) - __client__: Client = dataclasses.field( + _client: Client = dataclasses.field( init=False, default_factory=_configure_analytics ) @@ -62,9 +62,7 @@ def __post_init__(self): } def track_data(self, action: str, data: Dict[str, Any]): - self.__client__.track( - self.__server_id__, action, {**data, **self.__system_info__} - ) + self._client.track(self.__server_id__, action, {**data, **self.__system_info__}) async def track_error(error: RubrixServerError): diff --git a/tests/server/conftest.py b/tests/server/conftest.py index 326a371a2d..5cee31f46d 100644 --- a/tests/server/conftest.py +++ b/tests/server/conftest.py @@ -5,7 +5,10 @@ @pytest.fixture def telemetry_track_data(mocker): + client = telemetry._TelemetryClient.get() + # Disable sending data for tests + client._client = telemetry._configure_analytics(disable_send=False) spy = mocker.spy(client, "track_data") return spy From 3fa7006f7031f89bd04840956a3c44569675ae32 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 16:45:08 +0200 Subject: [PATCH 15/31] chore: adapt event names --- src/rubrix/server/commons/telemetry.py | 4 ++-- tests/server/commons/test_telemetry.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index cbbefa2660..59b1247929 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -68,13 +68,13 @@ def track_data(self, action: str, data: Dict[str, Any]): async def track_error(error: RubrixServerError): client = _TelemetryClient.get() if client: - client.track_data("ServerError", {"code": error.code}) + client.track_data("ErrorRaised", {"code": error.code}) async def track_bulk(task: TaskType, records: int): client = _TelemetryClient.get() if client: - client.track_data("BulkData", {"task": task, "records": records}) + client.track_data("DataLogged", {"task": task, "records": records}) async def track_login(): diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 4d9404f7f8..307b929c89 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -18,7 +18,7 @@ async def test_track_bulk(telemetry_track_data): await telemetry.track_bulk(task=task, records=records) telemetry_track_data.assert_called_once_with( - "BulkData", {"task": task, "records": records} + "DataLogged", {"task": task, "records": records} ) @@ -28,5 +28,5 @@ async def test_track_error(telemetry_track_data): await telemetry.track_error(error) telemetry_track_data.assert_called_once_with( - "ServerError", {"code": error.get_error_code()} + "ErrorRaised", {"code": error.get_error_code()} ) From 24b1bd8cb2fd106c042501abd6104f93e4e157c5 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 25 Aug 2022 16:50:54 +0200 Subject: [PATCH 16/31] chore: Normalize event naming --- src/rubrix/server/commons/telemetry.py | 8 ++++---- tests/server/commons/test_telemetry.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index 59b1247929..a1482a08b5 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -16,7 +16,7 @@ Client = None -def _configure_analytics(disable_send: bool = True) -> Client: +def _configure_analytics(disable_send: bool = False) -> Client: API_KEY = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" return Client( @@ -68,16 +68,16 @@ def track_data(self, action: str, data: Dict[str, Any]): async def track_error(error: RubrixServerError): client = _TelemetryClient.get() if client: - client.track_data("ErrorRaised", {"code": error.code}) + client.track_data("ServerErrorFound", {"code": error.code}) async def track_bulk(task: TaskType, records: int): client = _TelemetryClient.get() if client: - client.track_data("DataLogged", {"task": task, "records": records}) + client.track_data("LogRecordsRequested", {"task": task, "records": records}) async def track_login(): client = _TelemetryClient.get() if client: - client.track_data("UserLogged", {}) + client.track_data("UserInfoRequested", {}) diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 307b929c89..292a4349c5 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -9,7 +9,7 @@ async def test_track_login(telemetry_track_data): await telemetry.track_login() - telemetry_track_data.assert_called_once_with("UserLogged", {}) + telemetry_track_data.assert_called_once_with("UserInfoRequested", {}) @pytest.mark.asyncio @@ -18,7 +18,7 @@ async def test_track_bulk(telemetry_track_data): await telemetry.track_bulk(task=task, records=records) telemetry_track_data.assert_called_once_with( - "DataLogged", {"task": task, "records": records} + "LogRecordsRequested", {"task": task, "records": records} ) @@ -28,5 +28,5 @@ async def test_track_error(telemetry_track_data): await telemetry.track_error(error) telemetry_track_data.assert_called_once_with( - "ErrorRaised", {"code": error.get_error_code()} + "ServerErrorFound", {"code": error.get_error_code()} ) From 69ecc53eeb452d444d2951983ef3395027ceae15 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Fri, 26 Aug 2022 16:06:24 +0200 Subject: [PATCH 17/31] chore: telemetry key as env var --- src/rubrix/server/commons/telemetry.py | 8 ++------ src/rubrix/server/settings.py | 2 ++ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index a1482a08b5..af14139af1 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -17,13 +17,9 @@ def _configure_analytics(disable_send: bool = False) -> Client: - API_KEY = "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" + API_KEY = settings.telemetry_key or "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" - return Client( - write_key=API_KEY, - gzip=True, - send=not disable_send, # TODO: set to False for testing - ) + return Client(write_key=API_KEY, gzip=True, send=not disable_send) @dataclasses.dataclass diff --git a/src/rubrix/server/settings.py b/src/rubrix/server/settings.py index 71644beceb..0fbba3b5b8 100644 --- a/src/rubrix/server/settings.py +++ b/src/rubrix/server/settings.py @@ -84,6 +84,8 @@ class ApiSettings(BaseSettings): enable_telemetry: bool = True + telemetry_key: Optional[str] = None + @validator("disable_es_index_template_creation", always=True) def check_index_template_creation_value(cls, value): From ac28e3ddca7efb4178e403f578c0c24a4ac93b73 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Fri, 26 Aug 2022 16:35:24 +0200 Subject: [PATCH 18/31] docs: include fields and info tracked by the telemetry module. --- docs/community/telemetry.md | 23 +++++++++++++++-------- src/rubrix/server/commons/telemetry.py | 6 +----- src/rubrix/server/settings.py | 1 + 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/community/telemetry.md b/docs/community/telemetry.md index 1447aa9b09..39c8332c57 100644 --- a/docs/community/telemetry.md +++ b/docs/community/telemetry.md @@ -2,21 +2,21 @@ Rubrix uses telemetry to report anonymous usage and error information. As an open-source software, this type of information is important to improve and understand how the product is used. ## How to opt-out -You can opt-out of telemetry reporting using the `ENV` variable `var_name_tbd` before launching the server. Setting this variable to `false` will completely disable telemetry reporting. +You can opt-out of telemetry reporting using the `ENV` variable `RUBRIX_ENABLE_TELEMETRY` before launching the server. Setting this variable to `0` will completely disable telemetry reporting. If you are a Linux/MacOs users you should run: ```bash -bash command to disable telemetry +export RUBRIX_ENABLE_TELEMETRY=0 ``` If you are Windows users you should run: ```bash -bash command to disable telemetry +set RUBRIX_ENABLE_TELEMETRY=0 ``` -To opt-in again, you can set the variable to `true`. +To opt-in again, you can set the variable to `1`. ## Why reporting telemetry Anonymous telemetry information enable us to continously improve the product and detect recurring problems to better serve all users. We collect aggregated information about general usage and errors. We do NOT collect any information of users' data records, datasets, or metadata information. @@ -31,14 +31,21 @@ We do not collect any piece of information related to the source data you store The following usage and error information is reported: -* exhaustive ist of fields/info -* ... +* The code of the raised error +* Task name and number of records for bulk operations +* The rubrix version running the server +* The python version, e.g. `3.8.13` +* The system/OS name, such as `Linux`, `Darwin`, `Windows` +* The system’s release version, e.g. `Darwin Kernel Version 21.5.0: Tue Apr 26 21:08:22 PDT 2022; root:xnu-8020` +* The machine type, e.g. `AMD64` +* The underlying platform spec with as much useful information as possible. (ej. `macOS-10.16-x86_64-i386-64bit`) + This is performed by registering information from the following API methods: * `/api/me` -* `/api/dataset/.../bulk` -* API errors +* `/api/dataset/{name}/{task}:bulk` +* Raised server API errors For transparency, you can inspect the source code where this is performed here (add link to the source). diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index af14139af1..e0602c156d 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -48,11 +48,7 @@ def __post_init__(self): "system": platform.system(), "machine": platform.machine(), "platform": platform.platform(), - "python_version": "{major}.{minor}.{patch}".format( - major=sys.version_info.major, - minor=sys.version_info.minor, - patch=sys.version_info.micro, - ), + "python_version": platform.python_version(), "sys_version": platform.version(), "rubrix_version": __version__, } diff --git a/src/rubrix/server/settings.py b/src/rubrix/server/settings.py index 0fbba3b5b8..886898ac77 100644 --- a/src/rubrix/server/settings.py +++ b/src/rubrix/server/settings.py @@ -121,6 +121,7 @@ def obfuscated_elasticsearch(self) -> str: class Config: # TODO: include a common prefix for all rubrix env vars. + env_prefix = "RUBRIX_" fields = { "elasticsearch_ca_path": { "env": "RUBRIX_ELASTICSEARCH_CA_PATH", From e241141a53c55551dc1886419e40639f563e9a02 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 29 Aug 2022 13:44:32 +0200 Subject: [PATCH 19/31] fix: keep old fashion env var names --- src/rubrix/server/settings.py | 42 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/src/rubrix/server/settings.py b/src/rubrix/server/settings.py index 886898ac77..2a0e1707f7 100644 --- a/src/rubrix/server/settings.py +++ b/src/rubrix/server/settings.py @@ -75,8 +75,6 @@ class ApiSettings(BaseSettings): es_records_index_shards: int = 1 es_records_index_replicas: int = 0 - # TODO(@frascuchon): remove in v0.12.0 - disable_es_index_template_creation: bool = False metadata_fields_limit: int = Field( default=50, gt=0, le=100, description="Max number of fields in metadata" @@ -86,18 +84,6 @@ class ApiSettings(BaseSettings): telemetry_key: Optional[str] = None - @validator("disable_es_index_template_creation", always=True) - def check_index_template_creation_value(cls, value): - - if value is True: - cls.__LOGGER__.warning( - "The environment variable DISABLE_ES_INDEX_TEMPLATE_CREATION won't be used anymore.\n" - "If you want customize the dataset creation index, please refer documentation " - "https://rubrix.readthedocs.io/en/stable" - "/getting_started/advanced_setup_guides.html#change-elasticsearch-index-analyzers" - ) - return value - @property def dataset_index_name(self) -> str: ns = self.namespace @@ -123,20 +109,30 @@ class Config: # TODO: include a common prefix for all rubrix env vars. env_prefix = "RUBRIX_" fields = { - "elasticsearch_ca_path": { - "env": "RUBRIX_ELASTICSEARCH_CA_PATH", + # TODO(@frascuchon): Remove in 0.20.0 + "elasticsearch": { + "env": ["ELASTICSEARCH", f"{env_prefix}ELASTICSEARCH"], }, "elasticsearch_ssl_verify": { - "env": "RUBRIX_ELASTICSEARCH_SSL_VERIFY", + "env": [ + "ELASTICSEARCH_SSL_VERIFY", + f"{env_prefix}ELASTICSEARCH_SSL_VERIFY", + ] }, - "metadata_fields_limit": {"env": "RUBRIX_METADATA_FIELDS_LIMIT"}, - "namespace": { - "env": "RUBRIX_NAMESPACE", + "cors_origins": {"env": ["CORS_ORIGINS", f"{env_prefix}CORS_ORIGINS"]}, + "docs_enabled": {"env": ["DOCS_ENABLED", f"{env_prefix}DOCS_ENABLED"]}, + "es_records_index_shards": { + "env": [ + "ES_RECORDS_INDEX_SHARDS", + f"{env_prefix}ES_RECORDS_INDEX_SHARDS", + ] }, - "default_es_search_analyzer": { - "env": "RUBRIX_DEFAULT_ES_SEARCH_ANALYZER", + "es_records_index_replicas": { + "env": [ + "ES_RECORDS_INDEX_REPLICAS", + f"{env_prefix}ES_RECORDS_INDEX_SHARDS", + ] }, - "exact_es_search_analyzer": {"env": "RUBRIX_EXACT_ES_SEARCH_ANALYZER"}, } From 1a8684cac13b006c601e82943ab64961bf4a84bd Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 29 Aug 2022 18:31:33 +0200 Subject: [PATCH 20/31] feat: include request info in telemetry events --- src/rubrix/server/apis/v0/handlers/users.py | 10 +++++-- src/rubrix/server/commons/telemetry.py | 32 ++++++++++++++++----- src/rubrix/server/errors/api_errors.py | 2 +- tests/conftest.py | 14 ++++++++- tests/server/commons/test_telemetry.py | 21 ++++++++++---- tests/server/conftest.py | 11 ------- 6 files changed, 61 insertions(+), 29 deletions(-) diff --git a/src/rubrix/server/apis/v0/handlers/users.py b/src/rubrix/server/apis/v0/handlers/users.py index 6aabe1d4d3..f5e2b67b7a 100644 --- a/src/rubrix/server/apis/v0/handlers/users.py +++ b/src/rubrix/server/apis/v0/handlers/users.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from fastapi import APIRouter, Security +from fastapi import APIRouter, Request, Security from rubrix.server.commons import telemetry from rubrix.server.security import auth @@ -25,12 +25,16 @@ @router.get( "/me", response_model=User, response_model_exclude_none=True, operation_id="whoami" ) -async def whoami(current_user: User = Security(auth.get_user, scopes=[])): +async def whoami( + request: Request, current_user: User = Security(auth.get_user, scopes=[]) +): """ User info endpoint Parameters ---------- + request: + The original request current_user: The current request user @@ -40,5 +44,5 @@ async def whoami(current_user: User = Security(auth.get_user, scopes=[])): """ - await telemetry.track_login() + await telemetry.track_login(request, username=current_user.username) return current_user diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index e0602c156d..b784a43adf 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -1,9 +1,10 @@ import dataclasses import platform -import sys import uuid from typing import Any, Dict +from fastapi import Request + from rubrix.server.commons.models import TaskType from rubrix.server.errors.base_errors import RubrixServerError from rubrix.server.settings import settings @@ -53,14 +54,28 @@ def __post_init__(self): "rubrix_version": __version__, } - def track_data(self, action: str, data: Dict[str, Any]): - self._client.track(self.__server_id__, action, {**data, **self.__system_info__}) + def track_data( + self, action: str, data: Dict[str, Any], include_system_info: bool = True + ): + event_data = data.copy() + if include_system_info: + event_data.update(self.__system_info__) + self._client.track(self.__server_id__, action, event_data) + + +def _process_request_info(request: Request): + return { + header: request.headers.get(header) + for header in ["user-agent", "accept-language"] + } -async def track_error(error: RubrixServerError): +async def track_error(error: RubrixServerError, request: Request): client = _TelemetryClient.get() if client: - client.track_data("ServerErrorFound", {"code": error.code}) + client.track_data( + "ServerErrorFound", {"code": error.code, **_process_request_info(request)} + ) async def track_bulk(task: TaskType, records: int): @@ -69,7 +84,10 @@ async def track_bulk(task: TaskType, records: int): client.track_data("LogRecordsRequested", {"task": task, "records": records}) -async def track_login(): +async def track_login(request: Request, username: str): client = _TelemetryClient.get() if client: - client.track_data("UserInfoRequested", {}) + client.track_data( + "UserInfoRequested", + {"is_default_user": username == "rubrix", **_process_request_info(request)}, + ) diff --git a/src/rubrix/server/errors/api_errors.py b/src/rubrix/server/errors/api_errors.py index 4b1128110c..7784d86224 100644 --- a/src/rubrix/server/errors/api_errors.py +++ b/src/rubrix/server/errors/api_errors.py @@ -41,7 +41,7 @@ class APIErrorHandler: async def common_exception_handler(request: Request, error: Exception): """Wraps errors as custom generic error""" rubrix_error = exception_to_rubrix_error(error) - await telemetry.track_error(rubrix_error) + await telemetry.track_error(rubrix_error, request=request) return await http_exception_handler( request, RubrixServerHTTPException(rubrix_error) diff --git a/tests/conftest.py b/tests/conftest.py index 979135acc6..08c5f5212b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ from _pytest.logging import LogCaptureFixture from rubrix.client.sdk.users import api as users_api +from rubrix.server.commons import telemetry try: from loguru import logger @@ -17,7 +18,18 @@ @pytest.fixture -def mocked_client(monkeypatch) -> SecuredClient: +def telemetry_track_data(mocker): + + client = telemetry._TelemetryClient.get() + # Disable sending data for tests + client._client = telemetry._configure_analytics(disable_send=True) + spy = mocker.spy(client, "track_data") + + return spy + + +@pytest.fixture +def mocked_client(monkeypatch, telemetry_track_data) -> SecuredClient: with TestClient(app, raise_server_exceptions=False) as _client: client_ = SecuredClient(_client) diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 292a4349c5..6a2d2bcc06 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -1,15 +1,20 @@ import pytest +from fastapi import Request from rubrix.server.commons import telemetry from rubrix.server.commons.models import TaskType from rubrix.server.errors import RubrixServerError +mock_request = Request(scope={"type": "http", "headers": {}}) + @pytest.mark.asyncio async def test_track_login(telemetry_track_data): - - await telemetry.track_login() - telemetry_track_data.assert_called_once_with("UserInfoRequested", {}) + await telemetry.track_login(request=mock_request, username="rubrix") + telemetry_track_data.assert_called_once_with( + "UserInfoRequested", + {"accept-language": None, "is_default_user": True, "user-agent": None}, + ) @pytest.mark.asyncio @@ -25,8 +30,12 @@ async def test_track_bulk(telemetry_track_data): @pytest.mark.asyncio async def test_track_error(telemetry_track_data): error = RubrixServerError() - - await telemetry.track_error(error) + await telemetry.track_error(error, request=mock_request) telemetry_track_data.assert_called_once_with( - "ServerErrorFound", {"code": error.get_error_code()} + "ServerErrorFound", + { + "accept-language": None, + "code": "rubrix.api.errors::RubrixServerError", + "user-agent": None, + }, ) diff --git a/tests/server/conftest.py b/tests/server/conftest.py index 5cee31f46d..878090c624 100644 --- a/tests/server/conftest.py +++ b/tests/server/conftest.py @@ -1,14 +1,3 @@ import pytest from rubrix.server.commons import telemetry - - -@pytest.fixture -def telemetry_track_data(mocker): - - client = telemetry._TelemetryClient.get() - # Disable sending data for tests - client._client = telemetry._configure_analytics(disable_send=False) - spy = mocker.spy(client, "track_data") - - return spy From 789a49a1d34bac59b9e91aac47aabc59106fbfb3 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 29 Aug 2022 19:46:26 +0200 Subject: [PATCH 21/31] perf: disable telemetry when host is not accesible --- src/rubrix/server/commons/telemetry.py | 31 ++++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index b784a43adf..6f726a7428 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -1,8 +1,10 @@ import dataclasses +import logging import platform import uuid from typing import Any, Dict +import httpx from fastapi import Request from rubrix.server.commons.models import TaskType @@ -19,25 +21,40 @@ def _configure_analytics(disable_send: bool = False) -> Client: API_KEY = settings.telemetry_key or "C6FkcaoCbt78rACAgvyBxGBcMB3dM3nn" + TELEMETRY_HOST = "https://api.segment.io" - return Client(write_key=API_KEY, gzip=True, send=not disable_send) + # Check host connection + httpx.options(TELEMETRY_HOST, timeout=1) + + return Client( + write_key=API_KEY, + gzip=True, + host=TELEMETRY_HOST, + send=not disable_send, + max_retries=5, + ) @dataclasses.dataclass class _TelemetryClient: - __INSTANCE__: "_TelemetryClient" = None + client: Client + __INSTANCE__: "_TelemetryClient" = None __server_id__: str = dataclasses.field(init=False, default=None) - _client: Client = dataclasses.field( - init=False, default_factory=_configure_analytics - ) @classmethod def get(cls): if settings.enable_telemetry: if cls.__INSTANCE__ is None: - cls.__INSTANCE__ = cls() + try: + cls.__INSTANCE__ = cls(client=_configure_analytics()) + except Exception as err: + logging.getLogger(__name__).warning( + f"Cannot initialize telemetry. Error: {err}. Disabling..." + ) + settings.enable_telemetry = False + return None return cls.__INSTANCE__ def __post_init__(self): @@ -60,7 +77,7 @@ def track_data( event_data = data.copy() if include_system_info: event_data.update(self.__system_info__) - self._client.track(self.__server_id__, action, event_data) + self.client.track(self.__server_id__, action, event_data) def _process_request_info(request: Request): From 931b604754ec1c216e979b27c27013818971ef6d Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 29 Aug 2022 23:08:52 +0200 Subject: [PATCH 22/31] docs: include tracked headers --- docs/community/telemetry.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/community/telemetry.md b/docs/community/telemetry.md index 39c8332c57..8b7f3c6107 100644 --- a/docs/community/telemetry.md +++ b/docs/community/telemetry.md @@ -30,8 +30,8 @@ We do not collect any piece of information related to the source data you store ## Information reported The following usage and error information is reported: - * The code of the raised error +* The `user-agent` and `accept-language` http headers * Task name and number of records for bulk operations * The rubrix version running the server * The python version, e.g. `3.8.13` From 0fe656bd33c5fe079210376a9185bda2a33c8796 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 29 Aug 2022 23:09:25 +0200 Subject: [PATCH 23/31] tests: conditional mock for telemetry client --- tests/conftest.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 08c5f5212b..310857b3df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,11 +21,12 @@ def telemetry_track_data(mocker): client = telemetry._TelemetryClient.get() - # Disable sending data for tests - client._client = telemetry._configure_analytics(disable_send=True) - spy = mocker.spy(client, "track_data") + if client: + # Disable sending data for tests + client._client = telemetry._configure_analytics(disable_send=True) + spy = mocker.spy(client, "track_data") - return spy + return spy @pytest.fixture From 639352c6e7a59817aceb0f40ba3228d2e87c3680 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 1 Sep 2022 12:05:05 +0200 Subject: [PATCH 24/31] fix: disable ssl verify for telemetry backend check --- src/rubrix/server/commons/telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index 6f726a7428..062b628739 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -24,7 +24,7 @@ def _configure_analytics(disable_send: bool = False) -> Client: TELEMETRY_HOST = "https://api.segment.io" # Check host connection - httpx.options(TELEMETRY_HOST, timeout=1) + httpx.options(TELEMETRY_HOST, timeout=1, verify=False) return Client( write_key=API_KEY, From 39408cd483b42cea6124355a5fed584c8da2c00c Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 12:26:23 +0200 Subject: [PATCH 25/31] feat: include user_hash in login events --- docs/community/telemetry.md | 1 + src/rubrix/server/commons/telemetry.py | 19 ++++++++++++++----- tests/server/commons/test_telemetry.py | 12 +++++++++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/docs/community/telemetry.md b/docs/community/telemetry.md index 8b7f3c6107..7750ede7ee 100644 --- a/docs/community/telemetry.md +++ b/docs/community/telemetry.md @@ -33,6 +33,7 @@ The following usage and error information is reported: * The code of the raised error * The `user-agent` and `accept-language` http headers * Task name and number of records for bulk operations +* An anonymous generated user uuid * The rubrix version running the server * The python version, e.g. `3.8.13` * The system/OS name, such as `Linux`, `Darwin`, `Windows` diff --git a/src/rubrix/server/commons/telemetry.py b/src/rubrix/server/commons/telemetry.py index 062b628739..0f24048d5e 100644 --- a/src/rubrix/server/commons/telemetry.py +++ b/src/rubrix/server/commons/telemetry.py @@ -2,7 +2,7 @@ import logging import platform import uuid -from typing import Any, Dict +from typing import Any, Dict, Optional import httpx from fastapi import Request @@ -41,7 +41,11 @@ class _TelemetryClient: client: Client __INSTANCE__: "_TelemetryClient" = None - __server_id__: str = dataclasses.field(init=False, default=None) + __server_id__: Optional[uuid.UUID] = dataclasses.field(init=False, default=None) + + @property + def server_id(self) -> uuid.UUID: + return self.__server_id__ @classmethod def get(cls): @@ -61,7 +65,8 @@ def __post_init__(self): from rubrix import __version__ - self.__server_id__ = str(uuid.UUID(int=uuid.getnode())) + self.__server_id__ = uuid.UUID(int=uuid.getnode()) + self.__server_id_str__ = str(self.__server_id__) self.__system_info__ = { "system": platform.system(), "machine": platform.machine(), @@ -77,7 +82,7 @@ def track_data( event_data = data.copy() if include_system_info: event_data.update(self.__system_info__) - self.client.track(self.__server_id__, action, event_data) + self.client.track(self.__server_id_str__, action, event_data) def _process_request_info(request: Request): @@ -106,5 +111,9 @@ async def track_login(request: Request, username: str): if client: client.track_data( "UserInfoRequested", - {"is_default_user": username == "rubrix", **_process_request_info(request)}, + { + "is_default_user": username == "rubrix", + "user_hash": str(uuid.uuid5(namespace=client.server_id, name=username)), + **_process_request_info(request), + }, ) diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 6a2d2bcc06..2ea25f6cc2 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -1,3 +1,5 @@ +import uuid + import pytest from fastapi import Request @@ -11,9 +13,17 @@ @pytest.mark.asyncio async def test_track_login(telemetry_track_data): await telemetry.track_login(request=mock_request, username="rubrix") + + current_server_id = telemetry._TelemetryClient.get().server_id + expected_event_data = { + "accept-language": None, + "is_default_user": True, + "user-agent": None, + "user_hash": str(uuid.uuid5(current_server_id, name="rubrix")), + } telemetry_track_data.assert_called_once_with( "UserInfoRequested", - {"accept-language": None, "is_default_user": True, "user-agent": None}, + expected_event_data, ) From a690594aff5c7614d6a24e92819e633a021ca5d2 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 14:26:05 +0200 Subject: [PATCH 26/31] tests: try changes --- tests/server/commons/test_telemetry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 2ea25f6cc2..9a02d16650 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -14,12 +14,12 @@ async def test_track_login(telemetry_track_data): await telemetry.track_login(request=mock_request, username="rubrix") - current_server_id = telemetry._TelemetryClient.get().server_id + # current_server_id = telemetry._TelemetryClient.get().server_id expected_event_data = { "accept-language": None, "is_default_user": True, "user-agent": None, - "user_hash": str(uuid.uuid5(current_server_id, name="rubrix")), + # "user_hash": str(uuid.uuid5(current_server_id, name="rubrix")), } telemetry_track_data.assert_called_once_with( "UserInfoRequested", From 8d46b3187e8c2e964920d919ff841f8636c62caf Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 15:49:05 +0200 Subject: [PATCH 27/31] revert: prior commit --- tests/server/commons/test_telemetry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/server/commons/test_telemetry.py b/tests/server/commons/test_telemetry.py index 9a02d16650..2ea25f6cc2 100644 --- a/tests/server/commons/test_telemetry.py +++ b/tests/server/commons/test_telemetry.py @@ -14,12 +14,12 @@ async def test_track_login(telemetry_track_data): await telemetry.track_login(request=mock_request, username="rubrix") - # current_server_id = telemetry._TelemetryClient.get().server_id + current_server_id = telemetry._TelemetryClient.get().server_id expected_event_data = { "accept-language": None, "is_default_user": True, "user-agent": None, - # "user_hash": str(uuid.uuid5(current_server_id, name="rubrix")), + "user_hash": str(uuid.uuid5(current_server_id, name="rubrix")), } telemetry_track_data.assert_called_once_with( "UserInfoRequested", From 434328a5db9a089ec3ceca45964506fbee8f0d13 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 15:49:24 +0200 Subject: [PATCH 28/31] tests: try to fix telemetry tests --- tests/conftest.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 310857b3df..7b19c9a06a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ from rubrix.client.sdk.users import api as users_api from rubrix.server.commons import telemetry +from rubrix.server.settings import settings try: from loguru import logger @@ -21,12 +22,15 @@ def telemetry_track_data(mocker): client = telemetry._TelemetryClient.get() - if client: - # Disable sending data for tests - client._client = telemetry._configure_analytics(disable_send=True) - spy = mocker.spy(client, "track_data") + if not client: + settings.enable_telemetry = True + client = telemetry._TelemetryClient.get() - return spy + # Disable sending data for tests + client._client = telemetry._configure_analytics(disable_send=True) + spy = mocker.spy(client, "track_data") + + return spy @pytest.fixture From bfeae65599085ec7cbc33225c6cb2c66d1e9f81d Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 16:07:07 +0200 Subject: [PATCH 29/31] ci: force cache hit --- .github/workflows/package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index b80b4c8627..159f535283 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -69,7 +69,7 @@ jobs: key: conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.set-date.outputs.today }}-${{ hashFiles('environment_dev.yml') }}-${{ env.CACHE_NUMBER }} env: # Increase this value to reset cache if etc/example-environment.yml has not changed - CACHE_NUMBER: 0 + CACHE_NUMBER: 1 id: cache - name: Update environment From 31258c2007c9a59f679d928d9714442a4429a96e Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 16:17:10 +0200 Subject: [PATCH 30/31] chore: discard wrong analytics version --- pyproject.toml | 2 +- tests/conftest.py | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8a4b4abbc9..45d743cb9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ server = [ # Info status "psutil ~= 5.8.0", # Telemetry - "segment-analytics-python" + "segment-analytics-python != 2.2.1" ] listeners = [ "schedule ~= 1.1.0", diff --git a/tests/conftest.py b/tests/conftest.py index 7b19c9a06a..a8f1ff5c02 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,15 +22,12 @@ def telemetry_track_data(mocker): client = telemetry._TelemetryClient.get() - if not client: - settings.enable_telemetry = True - client = telemetry._TelemetryClient.get() + if client: + # Disable sending data for tests + client._client = telemetry._configure_analytics(disable_send=True) + spy = mocker.spy(client, "track_data") - # Disable sending data for tests - client._client = telemetry._configure_analytics(disable_send=True) - spy = mocker.spy(client, "track_data") - - return spy + return spy @pytest.fixture From 9fdfc3ebac7ffcebec30639535785ced0693ef12 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 8 Sep 2022 16:17:37 +0200 Subject: [PATCH 31/31] ci: force cache conda hit --- .github/workflows/package.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 159f535283..fa694bb009 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -55,22 +55,22 @@ jobs: use-mamba: true activate-environment: rubrix - - name: Set date for conda cache + - name: Get date for conda cache if: steps.filter.outputs.python_code == 'true' - id: set-date + id: get-date run: echo "::set-output name=today::$(/bin/date -u '+%Y%m%d')" shell: bash - name: Cache Conda env if: steps.filter.outputs.python_code == 'true' uses: actions/cache@v2 + id: cache with: path: ${{ env.CONDA }}/envs - key: conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.set-date.outputs.today }}-${{ hashFiles('environment_dev.yml') }}-${{ env.CACHE_NUMBER }} + key: conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.get-date.outputs.today }}-${{ hashFiles('environment_dev.yml') }}-${{ env.CACHE_NUMBER }} env: # Increase this value to reset cache if etc/example-environment.yml has not changed - CACHE_NUMBER: 1 - id: cache + CACHE_NUMBER: 2 - name: Update environment if: steps.filter.outputs.python_code == 'true' && steps.cache.outputs.cache-hit != 'true'