In [1]:
import numpy as np
import pandas as pd
import blindat as bd

In [2]:
# data params
COLUMNS = ["A", "B", "C", "D"]
NUM_ROWS = int(1e7)
DATA_SEED = 19421127

# generate data
np.random.seed(DATA_SEED)
data = np.random.rand(NUM_ROWS, len(COLUMNS))
df = pd.DataFrame(data, columns=COLUMNS)

df.head()

Unnamed: 0,A,B,C,D
0,0.519411,0.030766,0.064909,0.930325
1,0.269587,0.562393,0.227109,0.202936
2,0.369254,0.579577,0.01545,0.53417
3,0.67191,0.868601,0.142738,0.573955
4,0.903384,0.921365,0.019821,0.263312


In [3]:
# list of columns with global offset and scale ranges
rules = bd.generate_rules("A", offset=(10.0, 20.0), random_seed=42)

In [4]:
bd.inspect(rules)

{'A': {'offset': np.float64(13.745401188473625), 'scale': np.float64(1.0)}}

In [5]:
# blind data
df1 = bd.blind(df, rules)
df1.head()

Unnamed: 0,A,B,C,D
0,14.264812,0.030766,0.064909,0.930325
1,14.014989,0.562393,0.227109,0.202936
2,14.114655,0.579577,0.01545,0.53417
3,14.417311,0.868601,0.142738,0.573955
4,14.648785,0.921365,0.019821,0.263312


In [6]:
from blindat import obfuscate


class MeasurementData:
    def __init__(self, path=None):
        self.path = path  # path to data directory
        self._sim()

    def _sim(self):
        np.random.seed(DATA_SEED)
        self._columns = COLUMNS
        self._data = np.random.rand(NUM_ROWS, len(self._columns))

    @obfuscate
    def load_dataframe(self, transform=None):
        df = pd.DataFrame(self._data, columns=self._columns)
        return df


# initialize
measurement = MeasurementData()

In [7]:
# load dataframe
measurement.load_dataframe(transform=rules).head()

Unnamed: 0,A,B,C,D
0,14.264812,0.030766,0.064909,0.930325
1,14.014989,0.562393,0.227109,0.202936
2,14.114655,0.579577,0.01545,0.53417
3,14.417311,0.868601,0.142738,0.573955
4,14.648785,0.921365,0.019821,0.263312


In [8]:
# original data
measurement.load_dataframe().head()

Unnamed: 0,A,B,C,D
0,0.519411,0.030766,0.064909,0.930325
1,0.269587,0.562393,0.227109,0.202936
2,0.369254,0.579577,0.01545,0.53417
3,0.67191,0.868601,0.142738,0.573955
4,0.903384,0.921365,0.019821,0.263312


In [9]:
# in your data access module
DEFAULT_SPECIFICATION = {
    "A": {"offset": (10.0, 20.0), "scale": (0.9, 1.1)},
}


def default_rules(random_seed=None):
    return bd.generate_rules(DEFAULT_SPECIFICATION, random_seed=random_seed)


# in your analysis notebook
measurement.load_dataframe(transform=default_rules(42)).head()

Unnamed: 0,A,B,C,D
0,14.311634,0.030766,0.064909,0.930325
1,14.03929,0.562393,0.227109,0.202936
2,14.147941,0.579577,0.01545,0.53417
3,14.477879,0.868601,0.142738,0.573955
4,14.730219,0.921365,0.019821,0.263312


In [10]:
import warnings
from blindat import blind


class BlindData(MeasurementData):
    def __init__(self, *args, random_seed=None, **kwargs):
        super().__init__(*args, **kwargs)
        self._rules = default_rules(random_seed)

    def _secret_data(self):
        return super().load_dataframe()

    def load_dataframe(self):
        warnings.warn("data may be altered to mitigate experimenter bias.")
        return blind(self._secret_data(), rules=self._rules)


blind_data = BlindData(random_seed=42)

# blind by default
blind_data.load_dataframe().head()



Unnamed: 0,A,B,C,D
0,14.311634,0.030766,0.064909,0.930325
1,14.03929,0.562393,0.227109,0.202936
2,14.147941,0.579577,0.01545,0.53417
3,14.477879,0.868601,0.142738,0.573955
4,14.730219,0.921365,0.019821,0.263312
