# Test of the High-Level API

For development purposes

In [53]:
%load_ext autoreload
%autoreload 2
from __future__ import annotations

import pandas as pd

from tab_err import error_type, error_mechanism
from tab_err.api import high_level


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [54]:
def show_result(original_df: pd.DataFrame, perturbed_df: pd.DataFrame, error_mask: pd.DataFrame | None = None) -> pd.DataFrame:
    """Simple helper function to show DataFrames after perturbing them."""
    return (
        pd.concat([original_df, perturbed_df], keys=["original", "perturbed"], axis=1)
        if error_mask is None
        else pd.concat([original_df, perturbed_df, error_mask], keys=["original", "perturbed", "error_mask"], axis=1)
    )

In [55]:
df_typist_book_title = pd.DataFrame(
    {
        "typist": ["Alice", "Alice", "Alice", "Bob", "Bob", "Bob"],
        "book_title": ["To Kill a Mockingbird", "1984", "Pride and Prejudice", "The Great Gatsby", "Moby-Dick", "The Catcher in the Rye"],
        "rating": [1.0, 3.0, 3.0, 4.0, 2.0, 1.0],
    }
)



print(df_typist_book_title.dtypes)

typist         object
book_title     object
rating        float64
dtype: object


### TEST

In [56]:
df_corrupted, error_mask = high_level.create_errors(df_typist_book_title, 0.5)
show_result(df_typist_book_title, df_corrupted, error_mask)

Column-type dict:  {'typist': [<tab_err.error_type._extraneous.Extraneous object at 0x7f99299ca4a0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f99299cae60>, <tab_err.error_type._replace.Replace object at 0x7f99299ca200>, <tab_err.error_type._typo.Typo object at 0x7f99299cada0>, <tab_err.error_type._missing.MissingValue object at 0x7f99299cad40>], 'book_title': [<tab_err.error_type._extraneous.Extraneous object at 0x7f99299ca4a0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f99299cae60>, <tab_err.error_type._replace.Replace object at 0x7f99299ca200>, <tab_err.error_type._typo.Typo object at 0x7f99299cada0>, <tab_err.error_type._missing.MissingValue object at 0x7f99299cad40>], 'rating': [<tab_err.error_type._add_delta.AddDelta object at 0x7f99299ca350>, <tab_err.error_type._outlier.Outlier object at 0x7f99299cb970>, <tab_err.error_type._wrong_unit.WrongUnit object at 0x7f99299cbac0>, <tab_err.error_type._missing.MissingValue object at 0x7f99299cad40>]}
Column-mech dict:

  # Build MidLevel Config
  # Build MidLevel Config
  # Build MidLevel Config
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)


Unnamed: 0_level_0,original,original,original,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask
Unnamed: 0_level_1,typist,book_title,rating,typist,book_title,rating,typist,book_title,rating
0,Alice,To Kill a Mockingbird,1.0,Alice,To Kill a Mockingbird,1.0,False,False,False
1,Alice,1984,3.0,Alice,1984,3.0,False,False,False
2,Alice,Pride and Prejudice,3.0,Alice,Pride and Prejudice,3.0,False,False,False
3,Bob,The Great Gatsby,4.0,Bob,The Great Gatsby,4.0,False,False,False
4,Bob,Moby-Dick,2.0,Bob,Moby-Dick,2.0,False,False,False
5,Bob,The Catcher in the Rye,1.0,Bob,The Catcher in the Rye,1.0,False,False,False


# Testing of High Level API with a large dataset

In [57]:
# Read in data
df_clean = pd.read_csv("./clean_beers.csv")
df_clean.head()

Unnamed: 0,index,id,beer_name,style,ounces,abv,ibu,brewery_id,brewery_name,city,state
0,1,1436,Pub Beer,American Pale Lager,12.0,0.05,,408,10 Barrel Brewing Company,Bend,OR
1,2,2265,Devil's Cup,American Pale Ale (APA),12.0,0.066,,177,18th Street Brewery,Gary,IN
2,3,2264,Rise of the Phoenix,American IPA,12.0,0.071,,177,18th Street Brewery,Gary,IN
3,4,2263,Sinister,American Double / Imperial IPA,12.0,0.09,,177,18th Street Brewery,Gary,IN
4,5,2262,Sex and Candy,American IPA,12.0,0.075,,177,18th Street Brewery,Gary,IN


In [58]:
df_clean = df_clean.drop(columns = ["index", "id", "ounces", "ibu", "brewery_id", "city"])
df_clean.head()

Unnamed: 0,beer_name,style,abv,brewery_name,state
0,Pub Beer,American Pale Lager,0.05,10 Barrel Brewing Company,OR
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN
3,Sinister,American Double / Imperial IPA,0.09,18th Street Brewery,IN
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN


In [59]:
# Add a date time column, just all the same date
df_clean["arbitrary_date"] = pd.to_datetime("2025-02-20 14:30:00")
df_clean.head()

Unnamed: 0,beer_name,style,abv,brewery_name,state,arbitrary_date
0,Pub Beer,American Pale Lager,0.05,10 Barrel Brewing Company,OR,2025-02-20 14:30:00
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN,2025-02-20 14:30:00
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN,2025-02-20 14:30:00
3,Sinister,American Double / Imperial IPA,0.09,18th Street Brewery,IN,2025-02-20 14:30:00
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN,2025-02-20 14:30:00


In [60]:
df_clean["beer_name"] = df_clean["beer_name"].astype(str)
df_clean["style"] = df_clean["style"].astype(str)
df_clean["brewery_name"] = df_clean["brewery_name"].astype(str)
df_clean["state"] = df_clean["state"].astype(str)
df_clean.dtypes

beer_name                 object
style                     object
abv                      float64
brewery_name              object
state                     object
arbitrary_date    datetime64[ns]
dtype: object

In [61]:
# No error type spec
df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75)
show_result(df_clean, df_dirty, error_mask)

Column-type dict:  {'beer_name': [<tab_err.error_type._extraneous.Extraneous object at 0x7f9928f8ef50>, <tab_err.error_type._mojibake.Mojibake object at 0x7f9928f8fd90>, <tab_err.error_type._replace.Replace object at 0x7f9928f8fc70>, <tab_err.error_type._typo.Typo object at 0x7f9928f8f280>, <tab_err.error_type._missing.MissingValue object at 0x7f9928f8f790>], 'style': [<tab_err.error_type._extraneous.Extraneous object at 0x7f9928f8ef50>, <tab_err.error_type._mojibake.Mojibake object at 0x7f9928f8fd90>, <tab_err.error_type._replace.Replace object at 0x7f9928f8fc70>, <tab_err.error_type._typo.Typo object at 0x7f9928f8f280>, <tab_err.error_type._missing.MissingValue object at 0x7f9928f8f790>], 'abv': [<tab_err.error_type._add_delta.AddDelta object at 0x7f9928f8d180>, <tab_err.error_type._outlier.Outlier object at 0x7f9928f8e230>, <tab_err.error_type._wrong_unit.WrongUnit object at 0x7f9928f8ef80>, <tab_err.error_type._missing.MissingValue object at 0x7f9928f8f790>], 'brewery_name': [<tab_

  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)


Unnamed: 0_level_0,original,original,original,original,original,original,perturbed,perturbed,perturbed,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask,error_mask,error_mask,error_mask
Unnamed: 0_level_1,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date
0,Pub Beer,American Pale Lager,0.050,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,Pub Beer,'/American Pale Lager,0.050000,10 Barrel Brewing Company,OR,NaT,False,True,False,False,False,True
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN,2025-02-20 14:30:00,,American Pale Ale (APA),0.066000,18th Street Brewery,IN,2072-02-20 07:00:13.346685952,True,False,False,True,False,True
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN,2025-02-20 14:30:00,Rise of the Phoenix,American IPA,-0.503035,18th Street Brewery,ON,2025-02-20 14:30:00.000000000,True,True,True,True,True,True
3,Sinister,American Double / Imperial IPA,0.090,18th Street Brewery,IN,2025-02-20 14:30:00,,American Doube / Imperia IPA,0.090000,18th Street Bre3ery,IN,2025-02-20 14:30:00.000000000,True,True,False,True,True,True
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN,2025-02-20 14:30:00,,American IPA,0.075000,,IN,2025-02-20 14:30:00.000000000,True,True,False,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2405,Belgorado,Belgian IPA,0.067,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Belgorado,Belgian IPA,-0.507035,'/Wynkoop Brewing Company,CL,2049-06-29 04:26:07.944019968,False,False,True,True,True,True
2406,Rail Yard Ale,American Amber / Red Ale,0.052,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Rai Yard Ae,American Amber / Red Ale,-0.522035,'/Wynkoop Brewing Company,XO,NaT,True,False,True,True,True,True
2407,B3K Black Lager,Schwarzbier,0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,B3K Black Lager,Schwarzbier,0.550000,'/Wynkoop Brewing Company,C9,2025-02-20 14:30:00.000000000,False,True,True,True,True,False
2408,Silverback Pale Ale,American Pale Ale (APA),0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Silverback Pale Ale,American Pale Ale (APA),0.550000,Wynkoop Brewing Compqny,CO,2061-01-07 20:47:59.946644480,False,False,True,True,True,True


### Test of include/exlcude error types

In [62]:
# both error type spec -- should throw error -- Good!
#df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75, error_types_to_exclude=[error_type.AddDelta()], error_types_to_include=[error_type.AddDelta()])
#show_result(df_clean, df_dirty, error_mask)

In [63]:
# Error types to exclude - well formed
df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75, error_types_to_exclude=[error_type.MissingValue()])
show_result(df_clean, df_dirty, error_mask)

Column-type dict:  {'beer_name': [<tab_err.error_type._extraneous.Extraneous object at 0x7f99294ac9a0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f99294addb0>, <tab_err.error_type._replace.Replace object at 0x7f99294ad0c0>, <tab_err.error_type._typo.Typo object at 0x7f99294ac2b0>], 'style': [<tab_err.error_type._extraneous.Extraneous object at 0x7f99294ac9a0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f99294addb0>, <tab_err.error_type._replace.Replace object at 0x7f99294ad0c0>, <tab_err.error_type._typo.Typo object at 0x7f99294ac2b0>], 'abv': [<tab_err.error_type._add_delta.AddDelta object at 0x7f99294ac5b0>, <tab_err.error_type._outlier.Outlier object at 0x7f99294af850>, <tab_err.error_type._wrong_unit.WrongUnit object at 0x7f99294aef80>], 'brewery_name': [<tab_err.error_type._extraneous.Extraneous object at 0x7f99294ac9a0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f99294addb0>, <tab_err.error_type._replace.Replace object at 0x7f99294ad0c0>, <tab_err.err

  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)


Unnamed: 0_level_0,original,original,original,original,original,original,perturbed,perturbed,perturbed,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask,error_mask,error_mask,error_mask
Unnamed: 0_level_1,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date
0,Pub Beer,American Pale Lager,0.050,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,Pub Beer,American Pale Lagrr,-0.228651,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,False,True,True,True,False,False
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN,2025-02-20 14:30:00,Devil's C7p,American Pale Ale (APA),-0.212651,18th Street Breweey,IJ,2025-02-20 14:30:00,True,False,True,True,True,False
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN,2025-02-20 14:30:00,Rise of the Phoenix,American IPA,0.105780,18th Street Brewer6,IJ,2025-02-20 14:30:00,False,False,True,True,True,True
3,Sinister,American Double / Imperial IPA,0.090,18th Street Brewery,IN,2025-02-20 14:30:00,Sinister,AmericanDouble/ImperialIPA,0.900000,18th Street Brewery,ON,2025-02-20 14:30:00,False,True,True,False,True,True
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN,2025-02-20 14:30:00,Sex and Candy,American IPA,0.750000,18th Street Brewery,ON,2025-02-20 14:30:00,False,False,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2405,Belgorado,Belgian IPA,0.067,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Belgorado,-Belgian IPA,0.067000,Wynkoop Brewing Company,-CO,2025-02-20 14:30:00,True,True,False,False,True,True
2406,Rail Yard Ale,American Amber / Red Ale,0.052,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Rail Yard Ale,American Amber / Red Ale,0.052000,Wynkoop Brewing Comlany,CO,2025-02-20 14:30:00,False,False,False,True,False,True
2407,B3K Black Lager,Schwarzbier,0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,B3K Blqck Lager,-Schwarzbier,-0.223651,Wynkoop Brrwing Company,CO,2025-02-20 14:30:00,True,True,True,True,False,True
2408,Silverback Pale Ale,American Pale Ale (APA),0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Silverback Pale Ale,AmericanPaleAle(APA),0.055000,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,False,True,False,True,False,False


In [64]:
# error types to include - well formed
df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75, error_types_to_include=[error_type.AddDelta()])
show_result(df_clean, df_dirty, error_mask)

Column-type dict:  {'beer_name': [], 'style': [], 'abv': [<tab_err.error_type._add_delta.AddDelta object at 0x7f99294722c0>], 'brewery_name': [], 'state': [], 'arbitrary_date': [<tab_err.error_type._add_delta.AddDelta object at 0x7f99294722c0>]}
Column-mech dict:  {'beer_name': [<tab_err.error_mechanism._enar.ENAR object at 0x7f99299ca560>, <tab_err.error_mechanism._ecar.ECAR object at 0x7f99299cbc10>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299ca4d0>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299cb2b0>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299cb100>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299c9630>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299c9ae0>], 'style': [<tab_err.error_mechanism._enar.ENAR object at 0x7f99299ca560>, <tab_err.error_mechanism._ecar.ECAR object at 0x7f99299cbc10>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299cbd00>, <tab_err.error_mechanism._ear.EAR object at 0x7f99299cacb0>, <tab_err.error_mechanism._ear.

  # Build MidLevel Config
  # Build MidLevel Config
  # Build MidLevel Config
  # Build MidLevel Config
  return self._apply(data, error_mask, column)


Unnamed: 0_level_0,original,original,original,original,original,original,perturbed,perturbed,perturbed,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask,error_mask,error_mask,error_mask
Unnamed: 0_level_1,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date
0,Pub Beer,American Pale Lager,0.050,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,Pub Beer,American Pale Lager,0.050000,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,False,False,False,False,False,True
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN,2025-02-20 14:30:00,Devil's Cup,American Pale Ale (APA),-0.655726,18th Street Brewery,IN,2025-02-20 14:30:00,False,False,True,False,False,False
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN,2025-02-20 14:30:00,Rise of the Phoenix,American IPA,-0.650726,18th Street Brewery,IN,2025-02-20 14:30:00,False,False,True,False,False,False
3,Sinister,American Double / Imperial IPA,0.090,18th Street Brewery,IN,2025-02-20 14:30:00,Sinister,American Double / Imperial IPA,-0.631726,18th Street Brewery,IN,2025-02-20 14:30:00,False,False,True,False,False,True
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN,2025-02-20 14:30:00,Sex and Candy,American IPA,-0.646726,18th Street Brewery,IN,2025-02-20 14:30:00,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2405,Belgorado,Belgian IPA,0.067,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Belgorado,Belgian IPA,0.067000,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,False,False,False,False,False,False
2406,Rail Yard Ale,American Amber / Red Ale,0.052,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Rail Yard Ale,American Amber / Red Ale,0.052000,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,False,False,False,False,False,True
2407,B3K Black Lager,Schwarzbier,0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,B3K Black Lager,Schwarzbier,-0.666726,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,False,False,True,False,False,True
2408,Silverback Pale Ale,American Pale Ale (APA),0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Silverback Pale Ale,American Pale Ale (APA),-0.666726,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,False,False,True,False,False,True


In [65]:
# Error rates to include - malformed -- Should throw an error - Works
# df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75, error_types_to_include=[])
# show_result(df_clean, df_dirty, error_mask)

In [66]:
print("Overall Error Rate", error_mask.mean(axis=None), "\n\nColumn-Wise Error Rates:")
error_mask.mean(axis=0)

Overall Error Rate 0.24979253112033195 

Column-Wise Error Rates:


beer_name         0.000000
style             0.000000
abv               0.749378
brewery_name      0.000000
state             0.000000
arbitrary_date    0.749378
dtype: float64

### Test of the include/exclude error mechanisms

In [67]:
df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75, error_mechs_to_exclude=[error_mechanism.EAR()])
show_result(df_clean, df_dirty, error_mask)

Column-type dict:  {'beer_name': [<tab_err.error_type._extraneous.Extraneous object at 0x7f9928f8f8e0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f9928f8c610>, <tab_err.error_type._replace.Replace object at 0x7f9928f8d0c0>, <tab_err.error_type._typo.Typo object at 0x7f9928f8df60>, <tab_err.error_type._missing.MissingValue object at 0x7f9928f8ed10>], 'style': [<tab_err.error_type._extraneous.Extraneous object at 0x7f9928f8f8e0>, <tab_err.error_type._mojibake.Mojibake object at 0x7f9928f8c610>, <tab_err.error_type._replace.Replace object at 0x7f9928f8d0c0>, <tab_err.error_type._typo.Typo object at 0x7f9928f8df60>, <tab_err.error_type._missing.MissingValue object at 0x7f9928f8ed10>], 'abv': [<tab_err.error_type._add_delta.AddDelta object at 0x7f9928f8fac0>, <tab_err.error_type._outlier.Outlier object at 0x7f9928f8d8a0>, <tab_err.error_type._wrong_unit.WrongUnit object at 0x7f9928f8ece0>, <tab_err.error_type._missing.MissingValue object at 0x7f9928f8ed10>], 'brewery_name': [<tab_

  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)


Unnamed: 0_level_0,original,original,original,original,original,original,perturbed,perturbed,perturbed,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask,error_mask,error_mask,error_mask
Unnamed: 0_level_1,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date
0,Pub Beer,American Pale Lager,0.050,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,Pub Beer,American Pale Lager,1.174418,10 arrel rewing Company,"/,OR",2025-02-20 14:30:00.000000000,False,False,True,True,True,False
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN,2025-02-20 14:30:00,"/,Devil's Cup","/,American Pale Ale (APA)",-0.789953,,IN,2025-02-20 14:30:00.000000000,True,True,True,True,True,False
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN,2025-02-20 14:30:00,,American IPA,1.195418,18th Street rewery,IN,2025-02-20 14:30:00.000000000,True,False,True,True,True,False
3,Sinister,American Double / Imperial IPA,0.090,18th Street Brewery,IN,2025-02-20 14:30:00,Sinister,American Double / Imperial IPA,-0.770213,18th Street Brewery,IN,NaT,True,True,True,False,True,True
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN,2025-02-20 14:30:00,Sex and Candy,American IPA,1.199418,"/,18th Street Brewery",IN,NaT,True,False,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2405,Belgorado,Belgian IPA,0.067,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,"/,Belgorado",elgian IPA,1.191418,Wynkoop Brewing Com[any,,2025-02-20 14:30:00.000000000,True,True,True,True,True,True
2406,Rail Yard Ale,American Amber / Red Ale,0.052,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Rail Yard Ale,American Amber / Red Ale,1.176418,Wynkoop Brewing Company,,2025-02-20 14:30:00.000000000,False,True,True,True,True,True
2407,B3K Black Lager,Schwarzbier,0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,B3K Black Layer,Schwsrzbier,,Wynkoop rewing Company,,2025-02-20 14:30:00.000000000,True,True,True,True,True,True
2408,Silverback Pale Ale,American Pale Ale (APA),0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Silverback Pale Ale,"/,American Pale Ale (APA)",0.055000,Wynkoop Brewing Company,FO,2068-07-12 21:22:23.939123200,True,True,False,False,True,True


### Test of random seed

In [80]:
df_dirty, error_mask = high_level.create_errors(df_clean, max_error_rate=0.75, seed=10)
show_result(df_clean, df_dirty, error_mask)

Column-type dict:  {'beer_name': [<tab_err.error_type._extraneous.Extraneous object at 0x7f9928f8d690>, <tab_err.error_type._mojibake.Mojibake object at 0x7f9928f8e7a0>, <tab_err.error_type._replace.Replace object at 0x7f9928f8faf0>, <tab_err.error_type._typo.Typo object at 0x7f9928a906a0>, <tab_err.error_type._missing.MissingValue object at 0x7f9928a93220>], 'style': [<tab_err.error_type._extraneous.Extraneous object at 0x7f9928f8d690>, <tab_err.error_type._mojibake.Mojibake object at 0x7f9928f8e7a0>, <tab_err.error_type._replace.Replace object at 0x7f9928f8faf0>, <tab_err.error_type._typo.Typo object at 0x7f9928a906a0>, <tab_err.error_type._missing.MissingValue object at 0x7f9928a93220>], 'abv': [<tab_err.error_type._add_delta.AddDelta object at 0x7f9928f8c340>, <tab_err.error_type._outlier.Outlier object at 0x7f9928f8e6e0>, <tab_err.error_type._wrong_unit.WrongUnit object at 0x7f9928a900d0>, <tab_err.error_type._missing.MissingValue object at 0x7f9928a93220>], 'brewery_name': [<tab_

  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)
  return self._apply(data, error_mask, column)


Unnamed: 0_level_0,original,original,original,original,original,original,perturbed,perturbed,perturbed,perturbed,perturbed,perturbed,error_mask,error_mask,error_mask,error_mask,error_mask,error_mask
Unnamed: 0_level_1,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date,beer_name,style,abv,brewery_name,state,arbitrary_date
0,Pub Beer,American Pale Lager,0.050,10 Barrel Brewing Company,OR,2025-02-20 14:30:00,Pib Beer,Amrican Pal Lagr,0.500000,10 Barrl Brwing Company,OR,2049-06-29 04:26:07.944019968,True,True,True,True,True,True
1,Devil's Cup,American Pale Ale (APA),0.066,18th Street Brewery,IN,2025-02-20 14:30:00,Devol's Cup,Americzn Pale Ale (APA),0.660000,18th Strt Brwry,IM,2072-02-20 07:00:13.346685952,True,True,True,True,True,True
2,Rise of the Phoenix,American IPA,0.071,18th Street Brewery,IN,2025-02-20 14:30:00,Ris of th Phonix,American IPA,-0.004631,18th Street Brewery,IN,2049-06-29 04:26:07.944019968,True,True,True,True,True,True
3,Sinister,American Double / Imperial IPA,0.090,18th Street Brewery,IN,2025-02-20 14:30:00,Sinister,American Double / Imperial OPA,0.107239,18th Street Nrewery,IM,NaT,False,True,True,True,True,True
4,Sex and Candy,American IPA,0.075,18th Street Brewery,IN,2025-02-20 14:30:00,Sex and Fandy,Amrican IPA,0.017886,18th Strt Brwry,IN,2049-06-29 04:26:07.944019968,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2405,Belgorado,Belgian IPA,0.067,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Be;gorado,Belgiwn IPA,0.670000,Wynkoop Brwing Company,VO,2025-02-20 14:30:00.000000000,True,True,True,True,True,True
2406,Rail Yard Ale,American Amber / Red Ale,0.052,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Rail Yarc Ale,Amrican Ambr / Rd Al,0.520000,Wynkoop Brewing Company,CO,2049-06-29 04:26:07.944019968,True,True,True,True,True,True
2407,B3K Black Lager,Schwarzbier,0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,B3K Black Lagr,Schwarzbier,0.055000,Wynkoop Brewing Company,CO,NaT,True,False,False,False,False,True
2408,Silverback Pale Ale,American Pale Ale (APA),0.055,Wynkoop Brewing Company,CO,2025-02-20 14:30:00,Silverback Pale Ale,American Pale Ale (APZ),-0.016541,Wynkoop Brewing Dompany,VO,2025-02-20 14:30:00.000000000,False,True,True,True,True,True
