# Test of APIs

### High Level API

In [1]:
from __future__ import annotations

import pandas as pd

from tab_err import error_type
from tab_err.api import high_level
from tab_err.error_mechanism import ECAR
from tab_err.error_type import ErrorTypeConfig

df_typist_book_title = pd.DataFrame(
    {
        "typist": ["Alice", "Alice", "Alice", "Bob", "Bob", "Bob"],
        "book_title": ["To Kill a Mockingbird", "1984", "Pride and Prejudice", "The Great Gatsby", "Moby-Dick", "The Catcher in the Rye"],
        "rating": [1.0, 3.0, 3.0, 4.0, 2.0, 1.0],
    }
)

corrupted_data, error_mask = high_level.create_errors(df_typist_book_title, overall_max_error=0.75)

MidLevelConfig(columns={'typist': [ErrorModel(error_mechanism=<tab_err.error_mechanism._ecar.ECAR object at 0x7f6c072c3640>, error_type=<tab_err.error_type._missing.MissingValue object at 0x7f6c072c3430>, error_rate=0.4782132229199815), ErrorModel(error_mechanism=<tab_err.error_mechanism._enar.ENAR object at 0x7f6c072c35e0>, error_type=<tab_err.error_type._missing.MissingValue object at 0x7f6c072c3430>, error_rate=0.2717867770800185)], 'book_title': [ErrorModel(error_mechanism=<tab_err.error_mechanism._ecar.ECAR object at 0x7f6c072c3640>, error_type=<tab_err.error_type._missing.MissingValue object at 0x7f6c072c33a0>, error_rate=0.75)], 'rating': [ErrorModel(error_mechanism=<tab_err.error_mechanism._ear.EAR object at 0x7f6c072c34c0>, error_type=<tab_err.error_type._missing.MissingValue object at 0x7f6c072c3310>, error_rate=0.75)]})
{'typist': [ErrorModel(error_mechanism=<tab_err.error_mechanism._ecar.ECAR object at 0x7f6c072c3640>, error_type=<tab_err.error_type._missing.MissingValue ob

In [2]:
def show_result(original_df: pd.DataFrame, perturbed_df: pd.DataFrame, error_mask: pd.DataFrame | None = None) -> pd.DataFrame:
    """Simple helper function to show DataFrames after perturbing them."""
    return (
        pd.concat([original_df, perturbed_df], keys=["original", "perturbed"], axis=1)
        if error_mask is None
        else pd.concat([original_df, perturbed_df, error_mask], keys=["original", "perturbed", "error_mask"], axis=1)
    )

In [3]:
show_result(df_typist_book_title, corrupted_data, error_mask)

Unnamed: 0_level_0,original,original,original,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask
Unnamed: 0_level_1,typist,book_title,rating,typist,book_title,rating,typist,book_title,rating
0,Alice,To Kill a Mockingbird,1.0,Alice,,1.0,False,True,False
1,Alice,1984,3.0,,1984,,True,False,True
2,Alice,Pride and Prejudice,3.0,Alice,,,False,True,True
3,Bob,The Great Gatsby,4.0,Bob,,,False,True,True
4,Bob,Moby-Dick,2.0,,Moby-Dick,,True,False,True
5,Bob,The Catcher in the Rye,1.0,,,1.0,True,True,False


high-level config from yaml

In [None]:
corrupted_data, error_mask = high_level.create_errors_from_config(df_typist_book_title, "./../tab_err/hla_test_conf.yaml")


True
Mech:  EAR conditions:  {'conditioning-column': 'rating'}
Mech:  ENAR conditions:  None
None
{'extraneous_value_template': '.{value}'}
Mech:  EAR conditions:  {'conditioning-column': 'book_title'}
None
{'wrong_unit_scaling': 'lambda x: x * 10'}
Mechanisms:  {'typist': [<tab_err.error_mechanism._ear.EAR object at 0x7f6c07063580>, <tab_err.error_mechanism._enar.ENAR object at 0x7f6c070625f0>], 'rating': [<tab_err.error_mechanism._ear.EAR object at 0x7f6c06e63430>]} 
Types:  {'typist': [<tab_err.error_type._missing.MissingValue object at 0x7f6c06e60730>, <tab_err.error_type._extraneous.Extraneous object at 0x7f6c06e63e80>], 'rating': [<tab_err.error_type._missing.MissingValue object at 0x7f6c06e605b0>, <tab_err.error_type._wrong_unit.WrongUnit object at 0x7f6c072df5e0>]} 
Error Rates:  {'typist': [0.059011346407974834, 0.2622840443718461, 0.17870460922017906], 'rating': [0.10714295229704886, 0.19736990129355714, 0.19548714640939402]} 
Num Models:  {'typist': 3, 'rating': 3} 
Columns:

TypeError: Column rating does not contain scalars. Cannot apply a wrong unit.

In [5]:
show_result(df_typist_book_title, corrupted_data, error_mask)

Unnamed: 0_level_0,original,original,original,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask
Unnamed: 0_level_1,typist,book_title,rating,typist,book_title,rating,typist,book_title,rating
0,Alice,To Kill a Mockingbird,1.0,Alice,To Kill a Mockingbird,1.0,False,False,False
1,Alice,1984,3.0,Alice,1984,3.0,False,False,False
2,Alice,Pride and Prejudice,3.0,,Pride and Prejudice,3.0,True,False,False
3,Bob,The Great Gatsby,4.0,.Bob,The Great Gatsby,4.0,True,False,False
4,Bob,Moby-Dick,2.0,Bob,Moby-Dick,2.0,False,False,False
5,Bob,The Catcher in the Rye,1.0,Bob,The Catcher in the Rye,,False,False,True
