In [7]:
import pandas as pd
import pandera as pa

from pandera import DataFrameSchema, Column, Check, check_input


df = pd.DataFrame({
   "column1": [1, 4, 0, 10, 9],
   "column2": [-1.3, -1.4, -2.9, -10.1, -20.4],
})

in_schema = DataFrameSchema({
   "column1": Column(pa.Int,
                     Check(lambda x: 0 <= x <= 10, element_wise=True)),
   "column2": Column(pa.Float, Check(lambda x: x < -1.2)),
})

# by default, check_input assumes that the first argument is
# dataframe/series.
@check_input(in_schema)
def preprocessor(dataframe):
    dataframe["column1"] < 5
    return dataframe

preprocessed_df = preprocessor(df)
print(preprocessed_df)

   column1  column2
0        1     -1.3
1        4     -1.4
2        0     -2.9
3       10    -10.1
4        9    -20.4


In [8]:
df

Unnamed: 0,column1,column2
0,1,-1.3
1,4,-1.4
2,0,-2.9
3,10,-10.1
4,9,-20.4


In [9]:
df = pd.DataFrame({
    "height": [5.6, 6.4, 4.0, 7.1],
    "group": ["A", "B", "A", "B"],
})

schema = pa.DataFrameSchema({
    "height": pa.Column(
        pa.Float,
        pa.Check(lambda g: g["A"].mean() < g["B"].mean(), groupby="group")
    ),
    "group": pa.Column(pa.String)
})

schema.validate(df)

Unnamed: 0,height,group
0,5.6,A
1,6.4,B
2,4.0,A
3,7.1,B


In [15]:
schema = pa.DataFrameSchema({
    "a": pa.Column(
        pa.Int,
        checks=[
            # a vectorized check that returns a bool
            pa.Check(lambda s: s.mean() > 5, element_wise=False),

            # a vectorized check that returns a boolean series
            pa.Check(lambda s: s > 0, element_wise=False),

            # an element-wise check that returns a bool
            pa.Check(lambda x: x > 0, element_wise=True),
        ]
    ),
})
df = pd.DataFrame({"a": [4, 4, 5, 6, 6, 7, -8, 9]})
try:
    schema.validate(df, lazy=True)
except pa.errors.SchemaErrors as err:
    print("Schema errors and failure cases:")
    print(err.schema_errors.head())

Schema errors and failure cases:
  schema_context column             check  check_number  failure_case index
0         Column      a  <Check <lambda>>             0         False  None
