In [None]:
# | default_exp _helper

In [None]:
# | export

from typing import *

In [None]:
# | exporti

import os
import textwrap
from types import MethodType

import pandas as pd
import requests

from airt._constant import PROD_URL, SERVER_URL

In [None]:
import tempfile
from contextlib import contextmanager

import numpy as np
import pytest

import airt._sanitizer
from airt._constant import (
    CLIENT_NAME,
    SERVICE_PASSWORD,
    SERVICE_TOKEN,
    SERVICE_USERNAME,
)

In [None]:
# | export


def ensure_is_instance(o: Any, cls: Type):
    """A function to check if the object argument is an instance of the class argument.

    Args:
        o: A python object for which the instance needs to be checked.
        cls: The expected instance of the object argument.

    Raises:
        A TypeError if the object is not an instance of the class type.
    """
    if not isinstance(o, cls):
        raise TypeError(
            f"The parameter must be a {cls} type, but got `{type(o).__name__}`"
        )

In [None]:
# Tests for _ensure_is_instance

ensure_is_instance("sample", str)

In [None]:
# | exporti


def _get_json(response: requests.Response) -> Any:
    """A function to validate the status of the response object.

    Args:
        response: The response object that encapsulates the server's response.

    Returns:
        A dictionary of the response body.

    Raises:
        ValueError: If the response code is not in range of 200 - 399.
    """
    if response:
        return response.json()
    else:
        if (
            response.headers.get("content-type") == "application/json"
            and "detail" in response.json()
        ):
            raise ValueError(response.json()["detail"])
        else:
            raise ValueError(response.text)

In [None]:
# Tests for _get_json

expected = {"key1": {"key2": "Success"}}

response = requests.models.Response()
response.error_type = "expired"

# success status code
for ok_status_code in [200, 202, 205, 399]:
    display(f"{ok_status_code=}")
    response.status_code = ok_status_code
    response._content = b'{ "key1" : {"key2" : "Success"   } }'

    actual = _get_json(response)

    assert actual == expected

# error status code
for error_status_code in [400, 599]:
    display(f"{error_status_code=}")
    response.status_code = error_status_code
    response._content = b'{ "detail" : "Failed response" }'

    with pytest.raises(ValueError):
        _get_json(response)

'ok_status_code=200'

'ok_status_code=202'

'ok_status_code=205'

'ok_status_code=399'

'error_status_code=400'

'error_status_code=599'

In [None]:
def mask(s: str) -> str:
    return "*" * len(s)

In [None]:
assert mask("davor") == "*****"

In [None]:
# | export


def get_base_url(server: Optional[str]) -> str:
    """Return the base URL for the airt server.

    If the server value is `None`, retrive the value from the environment variable `AIRT_SERVER_URL`.
    If the variable is not set as well, then the default public server will be used.
    """

    return server if server is not None else os.environ.get(SERVER_URL, PROD_URL)

In [None]:
# Tests for get_base_url. Server is not None

server = "http://example-service:6006"
actual = get_base_url(server)

display(f"{actual=}")
assert actual == server

"actual='http://example-service:6006'"

In [None]:
# Tests for get_base_url. Server is None and the value should be returned from the env variable

server = None
actual = get_base_url(server)
expected = os.environ.get(SERVER_URL)

display(f"{mask(actual)=}")
assert actual == expected

"mask(actual)='************************'"

In [None]:
# Tests for get_base_url. Server is None and the env variable is also not set. Default value
# should be returned.

# deleting the env variable
_airt_server_url = None

if os.environ.get(SERVER_URL):
    _airt_server_url = os.environ.get(SERVER_URL)
    del os.environ[SERVER_URL]

server = None
actual = get_base_url(server)
expected = PROD_URL

display(f"{actual=}")
assert actual == expected

# Assigning the value back to the env variable
if _airt_server_url:
    os.environ[SERVER_URL] = _airt_server_url

"actual='https://api.airt.ai'"

In [None]:
# | export


def post_data(
    url: str,
    token: Optional[str],
    data: Optional[Dict[str, Any]] = None,
    json: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
    """A function to send a POST request.

    Args:
        url: The URL of the server to which the request needs to be sent.
        data: A Dictionary object to send in the body of the POST request. The data sent in this param will automatically be form-encoded by the request library.
        json: A Dictionary object to send in the body of the POST request. The data sent in this param will automatically be JSON-encoded by the request library.
        token: The unique auth token for the client, obtained via calling the `Client.get_token()` method.
            Set it to `None` in `Client.get_token()` to obtain the token.

    Returns:
        A dictionary that encapsulates the response body.

    Raises:
        ConnectionError: If the server is not reachable.
        ValueError: If the response code is not in range of 200 - 399.
    """
    if token is not None:
        headers = {"Authorization": f"Bearer {token}"}
        response = requests.post(url, json=json, data=data, headers=headers)
    else:
        response = requests.post(url, data=data, json=json)
    return _get_json(response)

In [None]:
# A helper context manager for creating a sample datablob using _post_data.


@contextmanager
def create_sample_db():
    # Getting the token
    username = os.environ[SERVICE_USERNAME]
    password = os.environ[SERVICE_PASSWORD]
    server = os.environ.get(SERVER_URL)

    response = requests.post(
        url=f"{server}/token", data=dict(username=username, password=password)
    )

    token = response.json()["access_token"]

    # Creating a s3 data source by calling _post_data
    uri = "s3://test-airt-service/account_312571_events_csv"
    access_key = os.environ.get("AWS_ACCESS_KEY_ID")
    secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")

    response = post_data(
        url=f"{server}/datablob/from_s3",
        json=dict(uri=uri, access_key=access_key, secret_key=secret_key),
        token=token,
    )
    yield (response["uuid"], token)


with create_sample_db() as (db_id, auth_token):
    display(f"{db_id=}, {mask(auth_token)=}")
    assert len(db_id) == 36  # including hypens
    assert len(auth_token) >= 127  # maybe

"db_id='5d9457a6-3083-4255-a68c-2ae61c394ea6', mask(auth_token)='*******************************************************************************************************************************'"

In [None]:
# Tests for _post_data
# Checking negative scenario. Checking for invalid username and password combination

username = "random_user"
password = "random password"
server = os.environ.get(SERVER_URL)

with pytest.raises(ValueError) as e:
    response = post_data(
        url=f"{server}/token",
        json=dict(username=username, password=password),
        token=auth_token,
    )

display(f"{e.value=}")

"e.value=ValueError([{'loc': ['body', 'username'], 'msg': 'field required', 'type': 'value_error.missing'}, {'loc': ['body', 'password'], 'msg': 'field required', 'type': 'value_error.missing'}])"

In [None]:
# | export


def get_data(url: str, token: Optional[str]) -> Any:
    """Send a GET request.

    Args:
        url: The URL of the server to which the request needs to be sent.
        token: The unique auth token for the client, obtained via calling the `Client.get_token()` method.

    Returns:
        A dictionary that encapsulates the response body.

    Raises:
        ConnectionError: If the server is not reachable.
        ValueError: If the response code is not in range of 200 - 399.
    """
    headers = {"Authorization": f"Bearer {token}"}
    response = requests.get(url, headers=headers)
    return _get_json(response)

In [None]:
# Tests for _get_data
# Checking negative scenario. Passing Invalid data_id
TEST_UUID_V4 = "00000000-0000-0000-0000-000000000000"

with create_sample_db() as (db_id, token):
    with pytest.raises(ValueError) as e:
        response = get_data(url=f"{server}/datablob/{TEST_UUID_V4}", token=token)

    display(f"{e.value=}")

"e.value=ValueError('The datablob uuid is incorrect. Please try again.')"

In [None]:
# | export


def delete_data(url: str, token: Optional[str]) -> Dict[str, Any]:
    """Send a DELETE request.

    Args:
        url: The URL of the server to which the request needs to be sent.
        token: The unique auth token for the client, obtained via calling the `Client.get_token()` method.

    Returns:
        A dictionary that encapsulates the response body.

    Raises:
        ConnectionError: If the server is not reachable.
        ValueError: If the response code is not in range of 200 - 399.
    """
    headers = {"Authorization": f"Bearer {token}"}
    response = requests.delete(url, headers=headers)
    return _get_json(response)

In [None]:
# Tests for _delete_data
# Checking positive scenario.

with create_sample_db() as (db_id, token):
    # delete the sample data source
    response = delete_data(url=f"{server}/datablob/{db_id}", token=token)

    display(f"{response=}")
    assert db_id == response["uuid"]

    # Testing negative scenario: Deleting already deleted data source
    with pytest.raises(ValueError) as e:
        response = delete_data(url=f"{server}/datablob/{db_id}", token=token)

    display(f"\n{e.value=}")

"response={'type': 's3', 'total_steps': 1, 'source': 's3://test-airt-service/account_312571_events_csv', 'completed_steps': 0, 'folder_size': None, 'error': None, 'cloud_provider': 'aws', 'region': 'eu-west-3', 'disabled': True, 'created': '2023-04-04T00:20:25', 'pulled_on': None, 'uuid': '8f21d3aa-100c-49bd-9dcd-10db1a574fdb', 'user': '3cbd5cf6-7100-4d52-8ad5-9993d2ce17ff', 'datasources': [], 'tags': [{'name': 'latest', 'uuid': 'ac19942e-65dd-4e56-8cd1-210e23d04611', 'created': '2023-04-04T00:18:38'}]}"

"\ne.value=ValueError('The datablob has already been deleted.')"

In [None]:
# Tests for _delete_data
# Testing negative scenario: Deleting invalid datablob

with pytest.raises(ValueError) as e:
    delete_data(url=f"{server}/datablob/{TEST_UUID_V4}", token=token)

display(f"{e.value=}")

"e.value=ValueError('The datablob uuid is incorrect. Please try again.')"

In [None]:
# | export


def add_ready_column(df: pd.DataFrame) -> pd.DataFrame:
    """Add ready column to the DataFrame

    Args:
        df: A pandas DataFrame with completed_steps and total_steps columns

    Returns:
        A pandas DataFrame with ready column
    """

    df["ready"] = df["completed_steps"] == df["total_steps"]
    return df.drop(columns=["completed_steps", "total_steps"])

In [None]:
# Tests for _add_ready_column

test_df = pd.DataFrame(
    [
        {"total_steps": 5, "completed_steps": 5, "id": 2},
        {"total_steps": 5, "completed_steps": 3, "id": 2},
    ]
)

result = add_ready_column(test_df)

display(result)
assert result.shape == (2, 2), result.shape
assert result.iloc[0]["ready"]
assert not result.iloc[1]["ready"]

Unnamed: 0,id,ready
0,2,True
1,2,False


In [None]:
# | export


def generate_df(
    items: Union[Dict[str, Any], List[Dict[str, Any]]], columns: list
) -> pd.DataFrame:
    """Generate a DataFrame based on the items length

    Args:
        items: A list encapsulating the response from an API endpoint that needs to be converted into a DataFrame.
        columns: A list of columns names to be included in the DataFrame.

    Returns:
        A DataFrame with a shape of (items, columns), if the length of the items is > 0, otherwise an empty DataFrame with only columns names.
    """

    if len(items) > 0:
        df = pd.DataFrame(items)[columns]
    else:
        df = pd.DataFrame({c: [] for c in columns})

    return df

In [None]:
# Tests for _generate_df

items = [{"total_steps": 5, "completed_steps": 5, "id": 2}]
columns = ["id", "total_steps"]

# Testing Positive Scenario
result = generate_df(items, columns)

display(result)
assert result.shape == (len(items), len(columns))

# Testing Negative Scenario

items = []

result = generate_df(items, columns)

display(result)
assert result.shape == (len(items), len(columns))

Unnamed: 0,id,total_steps
0,2,5


Unnamed: 0,id,total_steps


In [None]:
# | export


def get_values_from_item(items: list, value: Optional[str] = None) -> str:
    """Get **values** from items seperated by comma.

    Args:
        items: The item list from the response.
        value: The value to extract from each items.

    Returns:
        The **values** as string seperated by comma. If the tags list is empty, <none> will be returned.
    """

    if len(items) == 0:
        return "<none>"

    if value is None:
        return ", ".join(items)

    return ", ".join([str(i[value]) for i in items])

In [None]:
RANDOM_UUID_FOR_TESTING = "00000000-0000-0000-0000-000000000000"

_tags = [
    {
        "name": "version_1.0",
        "id": RANDOM_UUID_FOR_TESTING,
        "created": "2021-11-15T00:24:02.571Z",
    },
    {
        "name": "version_2.0",
        "id": RANDOM_UUID_FOR_TESTING,
        "created": "2021-11-15T00:24:02.571Z",
    },
]

value = "name"
display(f"{get_values_from_item(_tags, value)=}")
assert get_values_from_item(_tags, value) == "version_1.0, version_2.0"

"get_values_from_item(_tags, value)='version_1.0, version_2.0'"

In [None]:
_tags = []
value = "name"

display(f"{get_values_from_item(_tags, value)=}")
assert get_values_from_item(_tags, value) == "<none>"

"get_values_from_item(_tags, value)='<none>'"

In [None]:
response = [
    {
        "type": "s3",
        "uuid": RANDOM_UUID_FOR_TESTING,
        "datasources": [
            RANDOM_UUID_FOR_TESTING,
            RANDOM_UUID_FOR_TESTING,
            RANDOM_UUID_FOR_TESTING,
        ],
        "tags": "latest",
    },
    {
        "type": "s3",
        "uuid": RANDOM_UUID_FOR_TESTING,
        "datasources": [],
        "tags": "latest",
    },
]

value = "id"

for r in response:
    r["datasources"] = get_values_from_item(r["datasources"])

assert (
    response[0]["datasources"]
    == f"{RANDOM_UUID_FOR_TESTING}, {RANDOM_UUID_FOR_TESTING}, {RANDOM_UUID_FOR_TESTING}"
)
assert response[1]["datasources"] == "<none>"
response

[{'type': 's3',
  'uuid': '00000000-0000-0000-0000-000000000000',
  'datasources': '00000000-0000-0000-0000-000000000000, 00000000-0000-0000-0000-000000000000, 00000000-0000-0000-0000-000000000000',
  'tags': 'latest'},
 {'type': 's3',
  'uuid': '00000000-0000-0000-0000-000000000000',
  'datasources': '<none>',
  'tags': 'latest'}]

In [None]:
# | export


def get_attributes_from_instances(
    ox: List[object], attributes: List[str]
) -> List[Dict[str, Any]]:
    """Extract the **attributes** from the instances.

    Args:
        ox: List of instances.
        attributes: Attributes to extract from the instances as a list

    Returns:
        A list encapsulating the attribute name and value pairs of each instance.
    """

    lists = [{i: getattr(o, i) for i in attributes} for o in ox]
    return lists

In [None]:
class Test:
    def __init__(self, name, age, sex):
        self.name = name
        self.age = age
        self.sex = sex


test_1 = Test("sample_user_1", 30, "M")
test_2 = Test("sample_user_2", 30, "F")
attributes = ["name", "sex"]

actual = get_attributes_from_instances([test_1, test_2], attributes)
expected = [
    {"name": "sample_user_1", "sex": "M"},
    {"name": "sample_user_2", "sex": "F"},
]
assert actual == expected
actual

[{'name': 'sample_user_1', 'sex': 'M'}, {'name': 'sample_user_2', 'sex': 'F'}]

In [None]:
# | export


def dict_to_df(d: Dict[str, Any]) -> pd.DataFrame:
    """Convert the dict into a pandas dataframe

    Args:
        d: Dict containing the data and dtypes

    Returns:
        The pandas dataframe constructed from the dict
    """
    data = d["data"]
    dtypes = d["dtypes"]

    df = pd.DataFrame(
        data=data["data"], index=data["index"], columns=data["columns"]
    ).rename_axis(data["index_names"])

    for k, v in dtypes.items():
        df[k] = df[k].astype(v)

    return df

In [None]:
df = pd._testing.makeDataFrame().set_index("A")

for c in df.columns:
    df[f"{c}_float32"] = df[c].astype("float32")
    df[f"{c}_int32"] = df[c].astype("int32")
    df[f"{c}_bool"] = df[c].astype("bool")
df["ts"] = np.datetime64("now")

df_head = df.head()
d = {
    "data": df_head.to_dict("tight"),
    "dtypes": df_head.dtypes.apply(lambda x: str(x)).to_dict(),
}
actual = dict_to_df(d)

pd.testing.assert_frame_equal(df_head, actual)
actual

Unnamed: 0_level_0,B,C,D,B_float32,B_int32,B_bool,C_float32,C_int32,C_bool,D_float32,D_int32,D_bool,ts
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
-0.203917,-1.416683,-1.484782,-0.423902,-1.416683,-1,True,-1.484782,-1,True,-0.423902,0,True,2023-04-04 00:20:36
-0.491335,0.28559,-0.081368,0.69712,0.28559,0,True,-0.081368,0,True,0.69712,0,True,2023-04-04 00:20:36
-1.822544,-1.704996,0.371523,1.140692,-1.704996,-1,True,0.371523,0,True,1.140692,1,True,2023-04-04 00:20:36
-1.90089,-0.096739,0.843024,-1.839296,-0.096739,0,True,0.843024,0,True,-1.839296,-1,True,2023-04-04 00:20:36
-0.238108,-0.541821,0.457237,-0.901624,-0.541821,0,True,0.457237,0,True,-0.901624,0,True,2023-04-04 00:20:36


In [None]:
# | exporti


def check_and_append_otp_query_param(relative_url: str, otp: Union[str, None]) -> str:
    """Append the otp query parameter to the relative url if its not None

    Args:
        relative_url: The relative url for the route
        otp: user otp

    Returns:
        The updated relative_url if otp is not None, else returns the original relative_url
    """
    if otp is not None:
        relative_url = (
            (relative_url + f"&otp={otp}")
            if len(relative_url.split("?")) > 1
            else (relative_url + f"?otp={otp}")
        )
    return relative_url

In [None]:
url = "/search?a=string"
otp = 123
expected = "/search?a=string&otp=123"
actual = check_and_append_otp_query_param(url, otp)
assert actual == expected
actual

'/search?a=string&otp=123'

In [None]:
url = "/search"
otp = 123
expected = "/search?otp=123"
actual = check_and_append_otp_query_param(url, otp)

assert actual == expected
actual

'/search?otp=123'

In [None]:
url = "/search"
otp = None
expected = "/search"
actual = check_and_append_otp_query_param(url, otp)

assert actual == expected
actual

'/search'

In [None]:
# | export


def standardize_phone_number(phone_number: str) -> str:
    """Standardize the user's phone number

    This function takes the user's phone number in different formats and converts it into a
    standardized format. For example, the user can enter the phone number as **440123456789, +440123456789,
    00440123456789, +44 0123456789, and (+44) 012 345 6789** to register a UK-based phone number and this function takes the user input and
    converts it into 440123456789 standardized format.

    Args:
        phone_number: The phone number to convert into a standardized format.

    Returns:
        The phone number in a standardized format.
    """
    phone_number = "".join(filter(str.isdigit, phone_number))
    if phone_number.startswith("00"):
        phone_number = phone_number[2:]
    return phone_number

In [None]:
phone_numbers = [
    "+441234567890",
    "00441234567890",
    "441234567890",
    "0044–123-456-7890",
    "+44–1234567890",
    "+44 123 456 7890",
    "(+44) 123 456 7890",
    "+44 1234567890",
]

for phone_number in phone_numbers:
    actual = standardize_phone_number(phone_number=phone_number)
    expected = "441234567890"

    display(actual)
    assert actual == expected

'441234567890'

'441234567890'

'441234567890'

'441234567890'

'441234567890'

'441234567890'

'441234567890'

'441234567890'

In [None]:
# | exporti


def add_example_to_docs(o: Any, example: str):
    """Add the given example to the object

    Args:
        o: an object, typically a function or a class, for which the example needs to be added
        example: The example string to add
    """

    original_doc = o.__doc__

    first_line, everything_else = original_doc.split("\n", 1)
    doc_with_example_added = (
        first_line
        + "\n"
        + textwrap.dedent(everything_else)
        + "\n"
        + textwrap.dedent(example)
    )

    o.__doc__ = doc_with_example_added

In [None]:
def _test(s: str = "hello"):
    """This is a test function

    Args:
        s: sample text

    Returns:
        None
    """
    pass


example = """
Example:
    ```python
    print("hello world")
    ```
"""
expected = textwrap.dedent(
    """This is a test function
    
Args:
    s: sample text

Returns:
    None


Example:
    ```python
    print("hello world")
    ```
"""
)

add_example_to_docs(_test, example)
actual = textwrap.dedent(_test.__doc__)
np.testing.assert_array_equal(actual, expected)

actual

'This is a test function\n\nArgs:\n    s: sample text\n\nReturns:\n    None\n\n\nExample:\n    ```python\n    print("hello world")\n    ```\n'