In [None]:
import numpy as np
import pandas as pd
import blindat as bd

Create a `pandas.DataFrame()` with four columns of random data:


In [None]:
# data params
COLUMNS = ["A", "B", "C", "D"]
NUM_ROWS = int(1e7)
DATA_SEED = 19421127

# generate data
np.random.seed(DATA_SEED)
data = np.random.rand(NUM_ROWS, len(COLUMNS))
df = pd.DataFrame(data, columns=COLUMNS)

df.head()

### `@obfuscate`

If an experiment generates many different data files, it might be convenient to develop a custom class with methods for accessing each component. The decorator `@obfuscate` adds blinding to functions or methods that return a pandas DataFrame as the first or only result.  The method must accept the keyword argument `rules` (or `**kwargs`).

In [None]:
from blindat import obfuscate


class Measurement:
    def __init__(self, path=None):
        self.path = path  # path to data directory
        self._sim()

    def _sim(self):
        np.random.seed(DATA_SEED)
        self._columns = COLUMNS
        self._data = np.random.rand(NUM_ROWS, len(self._columns))

    def load_dataframe(self):
        return pd.DataFrame(self._data, columns=self._columns)

    @obfuscate
    def blind_data(self, rules=None):
        return self.load_dataframe()


# initialize
measurement = Measurement()

In [None]:
# load blind data
rules = bd.generate_rules("A", offset=(10.0, 20.0), random_seed=42)
measurement.blind_data(rules=rules).head()

In [None]:
# original data
measurement.load_dataframe().head()

In [None]:
# or
measurement.blind_data(rules=None).head()

This example requires the user to explicitly opt-in to blinding their data (zen of python #2).  

For consistency and to save the user a little effort you could include a `default_rules()` function in your data-access module.  This might be appropriate if columns with certain names always have similar values and should always be blinded.

In [None]:
# in your data access module
DEFAULT_SPECIFICATION = {
    "A": {"offset": (10.0, 20.0), "scale": 1.0},
}


def my_rules(random_seed=None):
    return bd.generate_rules(DEFAULT_SPECIFICATION, random_seed=random_seed)


# in your analysis notebook
measurement.blind_data(rules=my_rules(42)).head()

Alternatively, hard-code the rules into a data-access class.  

Forgetting about this could be problematic, so consider using an unambiguously named subclass and/or warnings.

In [None]:
import warnings

DEFAULT_RULES = my_rules(42)


class BlindMeasurement(Measurement):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    @obfuscate(default_rules=DEFAULT_RULES)
    def load_dataframe(self):
        warnings.warn("data may be altered to mitigate experimenter bias.")
        return super().load_dataframe()


blind_measurement = BlindMeasurement()

# blind by default
blind_measurement.load_dataframe().head()

In [None]:
# original, overwrite default_rules
blind_measurement.load_dataframe(rules=None).head()

In [None]:
?pd.DataFrame.copy