# Test of APIs

### High Level API

In [1]:
from __future__ import annotations

import pandas as pd

from tab_err import ErrorModel, error_mechanism, error_type
from tab_err.api import MidLevelConfig, high_level, mid_level

df_typist_book_title = pd.DataFrame(
    {
        "typist": ["Alice", "Alice", "Alice", "Bob", "Bob", "Bob"],
        "book_title": ["To Kill a Mockingbird", "1984", "Pride and Prejudice", "The Great Gatsby", "Moby-Dick", "The Catcher in the Rye"],
        "rating": [1.0, 3.0, 3.0, 4.0, 2.0, 1.0],
    }
)

corrupted_data, error_mask = high_level.create_errors(df_typist_book_title, overall_max_error=0.75)

MidLevelConfig(columns={'rating': [ErrorModel(error_mechanism=<tab_err.error_mechanism._enar.ENAR object at 0x7f3cb34943d0>, error_type=<tab_err.error_type._missing.MissingValue object at 0x7f3cb34946a0>, error_rate=0.75)]})
{'rating': [ErrorModel(error_mechanism=<tab_err.error_mechanism._enar.ENAR object at 0x7f3cb34943d0>, error_type=<tab_err.error_type._missing.MissingValue object at 0x7f3cb34946a0>, error_rate=0.75)]}


In [2]:
def show_result(original_df: pd.DataFrame, perturbed_df: pd.DataFrame, error_mask: pd.DataFrame | None = None) -> pd.DataFrame:
    """Simple helper function to show DataFrames after perturbing them."""
    return (
        pd.concat([original_df, perturbed_df], keys=["original", "perturbed"], axis=1)
        if error_mask is None
        else pd.concat([original_df, perturbed_df, error_mask], keys=["original", "perturbed", "error_mask"], axis=1)
    )

In [3]:
show_result(df_typist_book_title, corrupted_data, error_mask)

Unnamed: 0_level_0,original,original,original,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask
Unnamed: 0_level_1,typist,book_title,rating,typist,book_title,rating,typist,book_title,rating
0,Alice,To Kill a Mockingbird,1.0,Alice,,,False,True,True
1,Alice,1984,3.0,Alice,1984,,False,False,True
2,Alice,Pride and Prejudice,3.0,Alice,Pride and Prejudice,3.0,False,False,False
3,Bob,The Great Gatsby,4.0,Bob,,,False,True,True
4,Bob,Moby-Dick,2.0,Bob,,,False,True,True
5,Bob,The Catcher in the Rye,1.0,Bob,,1.0,False,True,False


high-level config from yaml

In [11]:
corrupted_data, error_mask = high_level.create_errors_from_config(df_typist_book_title, "./../tab_err/hla_test_conf.yaml")


Mech:  EAR conditions:  {'conditioning-column': 'rating'}
Mech:  ENAR conditions:  None
None
{'extraneous_value_template': '.{value}'}
Mech:  EAR conditions:  {'conditioning-column': 'book_title'}
None
{'add_delta_value': 1.0}
Mechanisms:  {'typist': [<tab_err.error_mechanism._ear.EAR object at 0x7f3ce8264d60>, <tab_err.error_mechanism._enar.ENAR object at 0x7f3ce8264d30>], 'rating': [<tab_err.error_mechanism._ear.EAR object at 0x7f3ce8264dc0>]} 
Types:  {'typist': [<tab_err.error_type._missing.MissingValue object at 0x7f3ce8265420>, <tab_err.error_type._extraneous.Extraneous object at 0x7f3ce8264be0>], 'rating': [<tab_err.error_type._missing.MissingValue object at 0x7f3ce82654e0>, <tab_err.error_type._add_delta.AddDelta object at 0x7f3cb2e0c3a0>]} 
Error Rates:  {'typist': [0.23291952890998147, 0.17383622383701464, 0.0932442472530039], 'rating': [0.4959253432423444, 0.40407465675765564]} 
Num Models:  {'typist': 3, 'rating': 2} 
Columns:  {'typist', 'rating'}
Mid Level Config:  MidLev

In [15]:
show_result(df_typist_book_title, corrupted_data, error_mask)

Unnamed: 0_level_0,original,original,original,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask
Unnamed: 0_level_1,typist,book_title,rating,typist,book_title,rating,typist,book_title,rating
0,Alice,To Kill a Mockingbird,1.0,.Alice,To Kill a Mockingbird,1.0,True,False,False
1,Alice,1984,3.0,Alice,1984,4.0,False,False,True
2,Alice,Pride and Prejudice,3.0,,Pride and Prejudice,3.0,True,False,False
3,Bob,The Great Gatsby,4.0,Bob,The Great Gatsby,5.0,False,False,True
4,Bob,Moby-Dick,2.0,Bob,Moby-Dick,3.0,False,False,True
5,Bob,The Catcher in the Rye,1.0,Bob,The Catcher in the Rye,2.0,False,False,True


Mid level api test -- ground truth -- And there's an indexer error...

In [10]:

conf = MidLevelConfig(
    {
        "rating": [
            ErrorModel(
                error_mechanism=error_mechanism.ECAR(),
                error_type=error_type.WrongUnit({"wrong_unit_scaling": {lambda x: x*10}}),
                error_rate=0.5
            )
        ]
    }
)
corrupted_data, error_mask = mid_level.create_errors(df_typist_book_title, conf)
show_result(df_typist_book_title, corrupted_data, error_mask)


ValueError: Incompatible indexer with DataFrame