# Test of the High-Level API

For development purposes

In [1]:
from __future__ import annotations

import pandas as pd

from tab_err.api import high_level

In [2]:
def show_result(original_df: pd.DataFrame, perturbed_df: pd.DataFrame, error_mask: pd.DataFrame | None = None) -> pd.DataFrame:
    """Simple helper function to show DataFrames after perturbing them."""
    return (
        pd.concat([original_df, perturbed_df], keys=["original", "perturbed"], axis=1)
        if error_mask is None
        else pd.concat([original_df, perturbed_df, error_mask], keys=["original", "perturbed", "error_mask"], axis=1)
    )

In [3]:
df_typist_book_title = pd.DataFrame(
    {
        "typist": ["Alice", "Alice", "Alice", "Bob", "Bob", "Bob"],
        "book_title": ["To Kill a Mockingbird", "1984", "Pride and Prejudice", "The Great Gatsby", "Moby-Dick", "The Catcher in the Rye"],
        "rating": [1.0, 3.0, 3.0, 4.0, 2.0, 1.0],
    }
)

df_typist_book_title["typist"] = df_typist_book_title["typist"].astype("string")
df_typist_book_title["book_title"] = df_typist_book_title["book_title"].astype("string")

print(df_typist_book_title.dtypes)

typist        string[python]
book_title    string[python]
rating               float64
dtype: object


### TEST

In [4]:
df_corrupted, error_mask = high_level.create_errors(df_typist_book_title, 0.5)
show_result(df_typist_book_title, df_corrupted, error_mask)

Column-type dict:  {'typist': [<tab_err.error_type._extraneous.Extraneous object at 0x7fe36eac0d90>, <tab_err.error_type._mojibake.Mojibake object at 0x7fe36eac3e20>, <tab_err.error_type._replace.Replace object at 0x7fe36eac3940>, <tab_err.error_type._typo.Typo object at 0x7fe36eac1ba0>], 'book_title': [<tab_err.error_type._extraneous.Extraneous object at 0x7fe36eac0d90>, <tab_err.error_type._mojibake.Mojibake object at 0x7fe36eac3e20>, <tab_err.error_type._replace.Replace object at 0x7fe36eac3940>, <tab_err.error_type._typo.Typo object at 0x7fe36eac1ba0>], 'rating': [<tab_err.error_type._wrong_unit.WrongUnit object at 0x7fe36eac0ca0>]}
Column-mech dict:  {'typist': [<tab_err.error_mechanism._enar.ENAR object at 0x7fe36eac3ca0>, <tab_err.error_mechanism._ecar.ECAR object at 0x7fe36eac3be0>, <tab_err.error_mechanism._ear.EAR object at 0x7fe36eac3b80>, <tab_err.error_mechanism._ear.EAR object at 0x7fe36eac0f10>], 'book_title': [<tab_err.error_mechanism._enar.ENAR object at 0x7fe36eac3ca0

Unnamed: 0_level_0,original,original,original,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask
Unnamed: 0_level_1,typist,book_title,rating,typist,book_title,rating,typist,book_title,rating
0,Alice,To Kill a Mockingbird,1.0,Alice,To Kill a Mockingbird,1.0,False,False,False
1,Alice,1984,3.0,Alice,1984,3.0,False,False,False
2,Alice,Pride and Prejudice,3.0,Alice,Pride and Prejudice,3.0,False,False,False
3,Bob,The Great Gatsby,4.0,Bob,The Great Gatsby,4.0,False,False,False
4,Bob,Moby-Dick,2.0,Bob,Moby-Dick,2.0,False,False,False
5,Bob,The Catcher in the Rye,1.0,Bob,The Catcher in the Rye,1.0,False,False,False
