diff --git a/.idea/api-to-dataframe.iml b/.idea/api-to-dataframe.iml index 0de63f1..85cf8cd 100644 --- a/.idea/api-to-dataframe.iml +++ b/.idea/api-to-dataframe.iml @@ -5,7 +5,7 @@ - + diff --git a/README.md b/README.md index 3e7b133..2ce9b7d 100644 --- a/README.md +++ b/README.md @@ -45,24 +45,37 @@ To install the package using poetry, use the following command: poetry add api-to-dataframe ``` -## How to use it +## User Guide ``` python ## Importing library from api_to_dataframe import ClientBuilder, RetryStrategies -# Create a client for simple ingest data from API (timeout 5 seconds) +# Create a client for simple ingest data from API (timeout 1 second) client = ClientBuilder(endpoint="https://api.example.com") -# if you can define timeout, use: (default is 5 seconds), with LinearStrategy (In development, actually don't nothing) and set headers: +# if you can define timeout with LinearStrategy and set headers: headers = { "application_name": "api_to_dataframe" } client = ClientBuilder(endpoint="https://api.example.com" ,retry_strategy=RetryStrategies.LinearStrategy - ,timeout=10 + ,connection_timeout=2 ,headers=headers) +""" + NOTE: by default the quantity of retries is 3 and the time between retries is 1 second, but you can define manually, like this: + +""" + +client = ClientBuilder(endpoint="https://api.example.com" + ,retry_strategy=RetryStrategies.LinearStrategy + ,connection_timeout=10 + ,headers=headers + ,retries=5 + ,delay=10) + + ### timeout, retry_strategy and headers are opcionals parameters # Get data from the API diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index 8f6a586..34b6960 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -1,98 +1,135 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting api-to-dataframe\n", - " Downloading api_to_dataframe-0.0.3-py3-none-any.whl (4.0 kB)\n", - "Collecting coverage<8.0.0,>=7.5.3\n", - " Using cached coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl (204 kB)\n", - "Collecting pytest<9.0.0,>=8.2.2\n", - " Downloading pytest-8.2.2-py3-none-any.whl (339 kB)\n", - "\u001b[K |████████████████████████████████| 339 kB 7.5 MB/s eta 0:00:01\n", - "\u001b[?25hCollecting tomli>=1\n", - " Using cached tomli-2.0.1-py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /Users/ivsouza/Library/Python/3.9/lib/python/site-packages (from pytest<9.0.0,>=8.2.2->api-to-dataframe) (1.2.0)\n", - "Requirement already satisfied: packaging in /Users/ivsouza/Library/Python/3.9/lib/python/site-packages (from pytest<9.0.0,>=8.2.2->api-to-dataframe) (23.2)\n", - "Collecting iniconfig\n", - " Using cached iniconfig-2.0.0-py3-none-any.whl (5.9 kB)\n", - "Collecting pluggy<2.0,>=1.5\n", - " Using cached pluggy-1.5.0-py3-none-any.whl (20 kB)\n", - "Installing collected packages: tomli, pluggy, iniconfig, pytest, coverage, api-to-dataframe\n", - "Successfully installed api-to-dataframe-0.0.3 coverage-7.5.3 iniconfig-2.0.0 pluggy-1.5.0 pytest-8.2.2 tomli-2.0.1\n", - "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 24.0 is available.\n", - "You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "cell_type": "markdown", "source": [ - "pip install api-to-dataframe" + "## Case use - Using Linear Strategy\n", + "For more examples see [User Guide on README](https://github.com/IvanildoBarauna/api-to-dataframe/blob/main/README.md#how-to-use-it)\n", + "\n", + "NOTE: Before ensure that api-to-dataframe is installed as a dependency, see how to do this [here](https://github.com/IvanildoBarauna/api-to-dataframe/blob/main/README.md#installation)" ] }, { - "cell_type": "code", - "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "\u001b[31mERROR: You must give at least one requirement to install (see \"pip help install\")\u001b[0m\n", - "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 24.0 is available.\n", - "You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "pip install --upgrade " - ] + "cell_type": "markdown", + "source": "" }, { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'ApiToDataframe' object has no attribute 'name'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[10], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mapi_to_dataframe\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ApiToDataframe\n\u001b[1;32m 3\u001b[0m new \u001b[38;5;241m=\u001b[39m ApiToDataframe\u001b[38;5;241m.\u001b[39mApiToDataframe(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIvanildo\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m \u001b[43mnew\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msay_hello\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/api_to_dataframe/run.py:6\u001b[0m, in \u001b[0;36mApiToDataframe.say_hello\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msay_hello\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHello \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mAttributeError\u001b[0m: 'ApiToDataframe' object has no attribute 'name'" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-06-21T04:14:54.199749Z", + "start_time": "2024-06-21T04:14:54.124358Z" } - ], + }, + "cell_type": "code", "source": [ - "from api_to_dataframe import ApiToDataframe\n", + "from api_to_dataframe import ClientBuilder, RetryStrategies\n", "\n", - "new = ApiToDataframe.ApiToDataframe(\"Ivanildo\")\n", + "client = ClientBuilder(\n", + " endpoint=\"https://brasilapi.com.br/api/banks/v1\",\n", + " retry_strategy=RetryStrategies.LinearRetryStrategy,\n", + " retries=3,\n", + " connection_timeout=1,\n", + " delay=1)\n", "\n", - "new.say_hello()\n", + "## Get response.json\n", + "api_data = client.get_api_data()\n", "\n", + "df = client.api_to_dataframe(api_data)\n", "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "df.head()" + ], + "outputs": [ + { + "data": { + "text/plain": [ + " ispb name code \\\n", + "0 00000000 BCO DO BRASIL S.A. 1.0 \n", + "1 00000208 BRB - BCO DE BRASILIA S.A. 70.0 \n", + "2 00038121 Selic NaN \n", + "3 00038166 Bacen NaN \n", + "4 00122327 SANTINVEST S.A. - CFI 539.0 \n", + "\n", + " fullName \n", + "0 Banco do Brasil S.A. \n", + "1 BRB - BANCO DE BRASILIA S.A. \n", + "2 Banco Central do Brasil - Selic \n", + "3 Banco Central do Brasil \n", + "4 SANTINVEST S.A. - CREDITO, FINANCIAMENTO E INV... " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ispbnamecodefullName
000000000BCO DO BRASIL S.A.1.0Banco do Brasil S.A.
100000208BRB - BCO DE BRASILIA S.A.70.0BRB - BANCO DE BRASILIA S.A.
200038121SelicNaNBanco Central do Brasil - Selic
300038166BacenNaNBanco Central do Brasil
400122327SANTINVEST S.A. - CFI539.0SANTINVEST S.A. - CREDITO, FINANCIAMENTO E INV...
\n", + "
" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 21 } ], "metadata": { diff --git a/pyproject.toml b/pyproject.toml index 2e53844..1d5f958 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [tool.poetry] name = "api-to-dataframe" -version = "1.1.0" +version = "1.2.0" description = "A package to convert API responses to pandas dataframe" authors = ["IvanildoBarauna "] readme = "README.md" license = "MIT" classifiers=[ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", diff --git a/src/api_to_dataframe/__init__.py b/src/api_to_dataframe/__init__.py index 27e9c86..5a5ce94 100644 --- a/src/api_to_dataframe/__init__.py +++ b/src/api_to_dataframe/__init__.py @@ -1,2 +1,2 @@ from .controller.client_builder import ClientBuilder -from .common.utils.retry_strategies import RetryStrategies +from .models.retainer import Strategies as RetryStrategies diff --git a/src/api_to_dataframe/common/utils/retry_strategies.py b/src/api_to_dataframe/common/utils/retry_strategies.py deleted file mode 100644 index 29bdf19..0000000 --- a/src/api_to_dataframe/common/utils/retry_strategies.py +++ /dev/null @@ -1,9 +0,0 @@ -from enum import Enum - - -class RetryStrategies(Enum): - NoRetryStrategy = 0 - LinearStrategy = 1 - ExponentialStrategy = 2 - CustomStrategy = 3 - diff --git a/src/api_to_dataframe/controller/client_builder.py b/src/api_to_dataframe/controller/client_builder.py index f6cd092..3b731a4 100644 --- a/src/api_to_dataframe/controller/client_builder.py +++ b/src/api_to_dataframe/controller/client_builder.py @@ -1,32 +1,54 @@ -from api_to_dataframe.common.utils.retry_strategies import RetryStrategies +from api_to_dataframe.models.retainer import RetryStrategies +from api_to_dataframe.models.retainer import Strategies from api_to_dataframe.models.get_data import GetData class ClientBuilder: def __init__(self, endpoint: str, - headers: dict = {}, - retry_strategy: RetryStrategies = RetryStrategies.NoRetryStrategy, - timeout: int = 5): + headers: dict = None, + retry_strategy: Strategies = Strategies.NoRetryStrategy, + retries: int = 3, + delay: int = 1, + connection_timeout: int = 1): + """ - Initializes an instance of ClientBuilder. + Initializes the ClientBuilder object. - Args: - endpoint (str): The API endpoint to be accessed. - retry_strategy (RetryStrategies, optional): The retry strategy for the request. Default is NoRetryStrategy. - timeout (int, optional): The timeout for the request. Default is 5 seconds. + Args: + endpoint (str): The API endpoint to connect to. + headers (dict, optional): The headers to use for the API request. Defaults to None. + retry_strategy (Strategies, optional): The strategy to use for retrying failed requests. Defaults to Strategies.NoRetryStrategy. + retries (int, optional): The number of times to retry a failed request. Defaults to 3. + delay (int, optional): The delay between retries in seconds. Defaults to 1. + connection_timeout (int, optional): The timeout for the connection in seconds. Defaults to 2. - Raises: - ValueError: If the endpoint is empty. + Raises: + ValueError: If endpoint is an empty string. + ValueError: If retries is not a non-negative integer. + ValueError: If delay is not a non-negative integer. + ValueError: If connection_timeout is not a non-negative integer. """ + + if headers is None: + headers = {} if endpoint == "": raise ValueError("::: endpoint param is mandatory :::") - else: - self.endpoint = endpoint - self.retry_strategy = retry_strategy - self.timeout = timeout - self.headers = headers + if not isinstance(retries, int) or retries < 0: + raise ValueError("retries must be a non-negative integer") + if not isinstance(delay, int) or delay < 0: + raise ValueError("delay must be a non-negative integer") + if not isinstance(connection_timeout, int) or connection_timeout < 0: + raise ValueError("connection_timeout must be a non-negative integer") + + self.endpoint = endpoint + self.retry_strategy = retry_strategy + self.connection_timeout = connection_timeout + self.headers = headers + self.retries = retries + self.delay = delay + @RetryStrategies def get_api_data(self): """ Retrieves data from the API using the defined endpoint and retry strategy. @@ -34,19 +56,22 @@ def get_api_data(self): Returns: dict: The response from the API. """ - response = GetData.get_response(self.endpoint, self.headers, self.retry_strategy, self.timeout) + response = GetData.get_response( + endpoint=self.endpoint, + headers=self.headers, + connection_timeout=self.connection_timeout + ) + return response.json() + + def _get_raw_api_data(self): + response = GetData.get_response( + endpoint=self.endpoint, + headers=self.headers, + connection_timeout=self.connection_timeout + ) return response @staticmethod def api_to_dataframe(response: dict): - """ - Converts the API response into a DataFrame. - - Args: - response (dict): The response from the API. - - Returns: - DataFrame: The data converted into a DataFrame. - """ df = GetData.to_dataframe(response) return df diff --git a/src/api_to_dataframe/models/get_data.py b/src/api_to_dataframe/models/get_data.py index dbf7d51..9e79fe3 100644 --- a/src/api_to_dataframe/models/get_data.py +++ b/src/api_to_dataframe/models/get_data.py @@ -1,32 +1,26 @@ import requests -from requests.exceptions import HTTPError, Timeout, RequestException +from requests.exceptions import HTTPError, Timeout import pandas as pd -from api_to_dataframe.common.utils.retry_strategies import RetryStrategies +# from api_to_dataframe.common.utils.retry_strategies import RetryStrategies +from api_to_dataframe.models.retainer import RetryStrategies class GetData: @staticmethod def get_response(endpoint: str, headers: dict, - retry_strategies: RetryStrategies, - timeout: int): - try: - response = requests.get(endpoint, timeout=timeout, headers=headers) - response.raise_for_status() - except HTTPError as http_err: - print(f'HTTP error occurred: {http_err}') - raise http_err - except Timeout as timeout_err: - print(f'Timeout error occurred: {timeout_err}') - raise timeout_err - else: - return response + connection_timeout: int): + + + response = requests.get(endpoint, timeout=connection_timeout, headers=headers) + response.raise_for_status() + return response @staticmethod def to_dataframe(response): try: - df = pd.DataFrame(response.json()) + df = pd.DataFrame(response) except Exception as err: raise TypeError(f"Invalid response for transform in dataframe: {err}") diff --git a/src/api_to_dataframe/models/retainer.py b/src/api_to_dataframe/models/retainer.py index e197021..9ad7137 100644 --- a/src/api_to_dataframe/models/retainer.py +++ b/src/api_to_dataframe/models/retainer.py @@ -1,15 +1,30 @@ -from api_to_dataframe.common.utils.retry_strategies import RetryStrategies +import time +from enum import Enum -class Retainer: - @staticmethod - def strategy(retry_strategy: RetryStrategies = RetryStrategies.NoRetryStrategy): - if retry_strategy == RetryStrategies.NoRetryStrategy: - print("::: NoRetryStrategy :::") - elif retry_strategy == RetryStrategies.LinearStrategy: - print("::: LinearStrategy :::") - elif retry_strategy == RetryStrategies.ExponentialStrategy: - print("::: ExponentialStrategy :::") - elif retry_strategy == RetryStrategies.CustomStrategy: - print("::: CustomStrategy :::") +class Strategies(Enum): + NoRetryStrategy = 0 + LinearRetryStrategy = 1 + ExponentialRetryStrategy = 2 + +def RetryStrategies(func): + def wrapper(*args, **kwargs): + retry_number = 0 + while retry_number < args[0].retries: + try: + return func(*args, **kwargs) + except Exception as e: + retry_number += 1 + + if args[0].retry_strategy == Strategies.NoRetryStrategy: + raise e + elif args[0].retry_strategy == Strategies.LinearRetryStrategy: + time.sleep(args[0].delay) + elif args[0].retry_strategy == Strategies.ExponentialRetryStrategy: + time.sleep(args[0].delay * 2 ** retry_number) + + if retry_number == args[0].retries: + print(f"Failed after {retry_number} retries using {args[0].retry_strategy}") + raise e + return wrapper diff --git a/tests/test_controller_client_builder.py b/tests/test_controller_client_builder.py index 43a3684..db4b24d 100644 --- a/tests/test_controller_client_builder.py +++ b/tests/test_controller_client_builder.py @@ -2,7 +2,7 @@ import pandas as pd import requests -from api_to_dataframe import ClientBuilder, RetryStrategies +from api_to_dataframe import ClientBuilder @pytest.fixture() @@ -17,10 +17,46 @@ def response_setup(): return new_client.get_api_data() -def test_constructor_without_param(): +def test_constructor_raises(): with pytest.raises(ValueError): new_client = ClientBuilder(endpoint="") + with pytest.raises(ValueError): + new_client = ClientBuilder( + endpoint="https://economia.awesomeapi.com.br/last/USD-BRL", + retries=-1 + ) + + with pytest.raises(ValueError): + new_client = ClientBuilder( + endpoint="https://economia.awesomeapi.com.br/last/USD-BRL", + delay=-1 + ) + + with pytest.raises(ValueError): + new_client = ClientBuilder( + endpoint="https://economia.awesomeapi.com.br/last/USD-BRL", + connection_timeout=-1 + ) + + with pytest.raises(ValueError): + new_client = ClientBuilder( + endpoint="https://economia.awesomeapi.com.br/last/USD-BRL", + retries="" + ) + + with pytest.raises(ValueError): + new_client = ClientBuilder( + endpoint="https://economia.awesomeapi.com.br/last/USD-BRL", + delay="" + ) + + with pytest.raises(ValueError): + new_client = ClientBuilder( + endpoint="https://economia.awesomeapi.com.br/last/USD-BRL", + connection_timeout="" + ) + def test_constructor_with_param(setup): expected_result = "https://economia.awesomeapi.com.br/last/USD-BRL" @@ -30,7 +66,7 @@ def test_constructor_with_param(setup): def test_response_to_json(setup): new_client = setup - response = new_client.get_api_data() + response = new_client._get_raw_api_data() assert isinstance(response, requests.Response) diff --git a/tests/test_models_get_data.py b/tests/test_models_get_data.py index fb5f5cd..db26e62 100644 --- a/tests/test_models_get_data.py +++ b/tests/test_models_get_data.py @@ -4,11 +4,6 @@ from api_to_dataframe.models.get_data import GetData from api_to_dataframe.controller.client_builder import ClientBuilder -from api_to_dataframe.common.utils.retry_strategies import RetryStrategies - - -def test_get_response(): - assert True def test_to_dataframe(): @@ -42,9 +37,8 @@ def test_http_error(): with ((pytest.raises(requests.exceptions.HTTPError))): GetData.get_response( endpoint=endpoint, - retry_strategies=RetryStrategies.NoRetryStrategy, headers={}, - timeout=10) + connection_timeout=10) @responses.activate @@ -56,9 +50,8 @@ def test_timeout_error(): with pytest.raises(requests.exceptions.Timeout): GetData.get_response( endpoint=endpoint, - retry_strategies=RetryStrategies.NoRetryStrategy, headers={}, - timeout=10) + connection_timeout=10) @responses.activate @@ -73,6 +66,5 @@ def test_request_exception(): with pytest.raises(requests.exceptions.RequestException): GetData.get_response( endpoint=endpoint, - retry_strategies=RetryStrategies.NoRetryStrategy, headers={}, - timeout=10) + connection_timeout=10) diff --git a/tests/test_models_retainer.py b/tests/test_models_retainer.py new file mode 100644 index 0000000..94c8997 --- /dev/null +++ b/tests/test_models_retainer.py @@ -0,0 +1,65 @@ +from api_to_dataframe import ClientBuilder, RetryStrategies +import requests +import time +import pytest + + +def test_linear_strategy(): + endpoint = "https://api-to-dataframe/" + max_retries = 2 + client = ClientBuilder( + endpoint=endpoint, + retry_strategy=RetryStrategies.LinearRetryStrategy, + retries=max_retries, + delay=1, + connection_timeout=1 + ) + + retry_number = 0 + + while retry_number < max_retries: + try: + start = time.time() + client.get_api_data() + except requests.exceptions.RequestException as e: + end = time.time() + assert end - start >= client.delay + retry_number += 1 + + assert retry_number == max_retries + + +def test_no_retry_strategy(): + endpoint = "https://api-to-dataframe/" + client = ClientBuilder( + endpoint=endpoint, + retry_strategy=RetryStrategies.NoRetryStrategy, + ) + + with pytest.raises(requests.exceptions.RequestException) as e: + client.get_api_data() + + +def test_exponential_strategy(): + endpoint = "https://api-to-dataframe/" + max_retries = 2 + client = ClientBuilder( + endpoint=endpoint, + retry_strategy=RetryStrategies.ExponentialRetryStrategy, + retries=max_retries, + delay=1, + connection_timeout=1 + ) + + retry_number = 0 + + while retry_number < max_retries: + try: + start = time.time() + client.get_api_data() + except requests.exceptions.RequestException as e: + end = time.time() + assert end - start >= client.delay * 2 ** retry_number + retry_number += 1 + + assert retry_number == max_retries