# Loggers

In [170]:
import inspect
import json

## DiffPrivLib Logger

In [171]:
import diffprivlib
from sklearn.pipeline import Pipeline

In [172]:
DPL = "diffprivlib"
DPL_INSTANCE = "_dpl_instance:"
DPL_TYPE = "_dpl_type:"

In [187]:
class DiffprivlibEncoder(json.JSONEncoder):
    """Overwrites JSON Encoder class to serialise DiffPrivLib
    pipelines with a specific format.
    """

    def default(self, o: dict):
        """Extends JSON encoder to DiffPrivLib class members"""
        types = [
            v[1] for v in inspect.getmembers(diffprivlib, inspect.isclass)
        ]
        if type(o) in types:
            return DPL_INSTANCE + o.__class__.__name__
        return super().default(o)  # regular json encoding

    def encode(self, o: dict) -> str:
        """Define JSON string representation of a DiffPrivLib pipeline"""

        def hint_tuples(item):
            if isinstance(item, tuple):
                return {"_tuple": True, "_items": item}
            if isinstance(item, list):
                return [hint_tuples(e) for e in item]
            if isinstance(item, dict):
                return {key: hint_tuples(value) for key, value in item.items()}
            return item

        return super().encode(hint_tuples(o))


def serialise_pipeline(pipeline: Pipeline):
    """Serialise the DiffPrivLib pipeline to send it through FastAPI

    Args:
        pipeline (Pipeline): a DiffPrivLib pipeline

    Raises:
        ValueError: If the input argument is not a scikit-learn pipeline.

    Returns:
        serialised (str): DiffPivLib pipeline as a serialised string
    """
    if not isinstance(pipeline, Pipeline):
        raise ValueError(
            "Input pipeline must be an instance of sklearn.pipeline.Pipeline"
        )

    json_body = {
        "module": DPL,
        "version": diffprivlib.__version__,
        "pipeline": [],
    }

    for step_name, step_fn in pipeline.steps:
        print(f"step name {step_name}")
        print(f"step fn {step_fn}")
        dict_params = vars(step_fn)
        print(f"dict_params {dict_params}")
        params = list(inspect.signature(type(step_fn)).parameters)
        print(f"params {params}")
        dict_params = {k: v for k, v in dict_params.items() if k in params}
        print(f"dict_params_2 {dict_params}")
        json_body["pipeline"].append(
            {
                "type": DPL_TYPE + step_fn.__class__.__name__,
                "name": step_name,
                "params": dict_params,
            }
        )
        print("")

    return json.dumps(json_body, cls=DiffprivlibEncoder)

In [188]:
from diffprivlib import models

pipeline = Pipeline([
    ('scaler', models.StandardScaler(epsilon = 0.5, bounds=([0, 0, 0, 0], [1, 1, 1, 1]))),
    ('classifier', models.LogisticRegression(epsilon = 1.0, data_norm = 1))
])

In [189]:
dpl_json = serialise_pipeline(pipeline)
dpl_json

step name scaler
step fn StandardScaler(accountant=BudgetAccountant(),
               bounds=([0, 0, 0, 0], [1, 1, 1, 1]), epsilon=0.5)
dict_params {'with_mean': True, 'with_std': True, 'copy': True, 'epsilon': 0.5, 'bounds': ([0, 0, 0, 0], [1, 1, 1, 1]), 'random_state': None, 'accountant': BudgetAccountant()}
params ['epsilon', 'bounds', 'copy', 'with_mean', 'with_std', 'random_state', 'accountant']
dict_params_2 {'with_mean': True, 'with_std': True, 'copy': True, 'epsilon': 0.5, 'bounds': ([0, 0, 0, 0], [1, 1, 1, 1]), 'random_state': None, 'accountant': BudgetAccountant()}

step name classifier
step fn LogisticRegression(accountant=BudgetAccountant(), data_norm=1)
dict_params {'penalty': 'l2', 'dual': False, 'tol': 0.0001, 'C': 1.0, 'fit_intercept': True, 'intercept_scaling': 1.0, 'class_weight': None, 'random_state': None, 'solver': 'lbfgs', 'max_iter': 100, 'multi_class': 'ovr', 'verbose': 0, 'warm_start': False, 'n_jobs': None, 'l1_ratio': None, 'epsilon': 1.0, 'data_norm': 1, 'cl

'{"module": "diffprivlib", "version": "0.6.4", "pipeline": [{"type": "_dpl_type:StandardScaler", "name": "scaler", "params": {"with_mean": true, "with_std": true, "copy": true, "epsilon": 0.5, "bounds": {"_tuple": true, "_items": [[0, 0, 0, 0], [1, 1, 1, 1]]}, "random_state": null, "accountant": "_dpl_instance:BudgetAccountant"}}, {"type": "_dpl_type:LogisticRegression", "name": "classifier", "params": {"tol": 0.0001, "C": 1.0, "fit_intercept": true, "random_state": null, "max_iter": 100, "verbose": 0, "warm_start": false, "n_jobs": null, "epsilon": 1.0, "data_norm": 1, "accountant": "_dpl_instance:BudgetAccountant"}}]}'

## Smartnoise Synth Logger

In [190]:
import snsynth
import pkg_resources

In [206]:
SSYNTH = "smartnoise-synth"
SSYNTH_TRANSFORMER = "_ssynth_transformer:"

In [207]:
version = pkg_resources.get_distribution(SSYNTH).version
version

'1.0.4'

In [287]:
def get_filtered_params(obj):
    """Get filtered parameters based on the object's signature."""
    params = list(inspect.signature(type(obj)).parameters)
    return {k: v for k, v in vars(obj).items() if k in params}

def handle_chain_transformer(col_name, col_constraints):
    """Handle ChainTransformer-specific logic."""
    transformers = col_constraints.transformers
    return {
        "type": SSYNTH_TRANSFORMER + "ChainTransformer",
        "params": [
            {
                "type": SSYNTH_TRANSFORMER + t.__class__.__name__,
                "params": get_filtered_params(t),
            }
            for t in transformers
        ],
    }

def handle_anonymization_transformer(col_name, col_constraints):
    """Handle AnonymizationTransformer-specific logic."""
    return {
        "type": SSYNTH_TRANSFORMER + "AnonymizationTransformer",
        "params": {"fake": col_constraints.fake.__name__},
    }

def handle_default_operator(col_name, col_constraints):
    """Handle default operator logic."""
    return {
        "type": SSYNTH_TRANSFORMER + col_constraints.__class__.__name__,
        "params": get_filtered_params(col_constraints),
    }

def serialise_constraints(constraints: dict):
    """Serialise the SmartnoiseSynth constraints to send it through FastAPI

    Args:
        constraints (dict): a SmartnoiseSynth TableTransformer constraints

    Raises:
        ValueError: If the input argument is not a SmartnoiseSynth constraint.

    Returns:
        serialised (str): SmartnoiseSynth pipeline as a serialised string
    """
    if not isinstance(constraints, dict):
        raise ValueError("Input constraints must be an instance of dict")

    json_body = {
        "module": SSYNTH,
        "version": pkg_resources.get_distribution(SSYNTH).version,
        "constraints": {},
    }

    for col_name, col_constraints in constraints.items():
        operator_name = col_constraints.__class__.__name__
        
        if operator_name == "ChainTransformer":
            transformer_dict = handle_chain_transformer(col_name, col_constraints)
        elif operator_name == "AnonymizationTransformer":
            transformer_dict = handle_anonymization_transformer(col_name, col_constraints)
        else:
            transformer_dict = handle_default_operator(col_name, col_constraints)
        
        json_body["constraints"][col_name] = transformer_dict

    return json.dumps(json_body)

In [291]:
import math
from snsynth.transform import ChainTransformer, LogTransformer, BinTransformer, MinMaxTransformer, AnonymizationTransformer, LabelTransformer, OneHotEncoder, StandardScaler, ClampTransformer, DropTransformer
from snsynth.transform.datetime import DateTimeTransformer
constraints = {
    'id': AnonymizationTransformer("email"),
    'income':
        ChainTransformer([
            LogTransformer(),
            BinTransformer(bins=20, lower=0, upper=50)
        ]),
    'height':
        ChainTransformer([
            StandardScaler(lower = 0, upper = 1),
            BinTransformer(bins=20, lower=0, upper=1)
        ]),
    'weight':
        ChainTransformer([
            ClampTransformer(lower=10, upper = 200),
            BinTransformer(bins=20)
        ]),
    'age': MinMaxTransformer(lower=0, upper=100),
    'sex': ChainTransformer([
        LabelTransformer(nullable=True), 
        OneHotEncoder()
    ]),
    'rank': LabelTransformer(nullable=False),
    'job': DropTransformer()
    #'date': ChainTransformer([DateTimeTransformer(), MinMaxTransformer(nullable=False)])
}

In [292]:
res = serialise_constraints(constraints)

In [307]:
res

'{"module": "smartnoise-synth", "version": "1.0.4", "constraints": {"id": {"type": "_ssynth_transformer:AnonymizationTransformer", "params": {"fake": "email"}}, "income": {"type": "_ssynth_transformer:ChainTransformer", "params": [{"type": "_ssynth_transformer:LogTransformer", "params": {}}, {"type": "_ssynth_transformer:BinTransformer", "params": {"lower": 0, "upper": 50, "epsilon": 0.0, "bins": 20, "nullable": false, "odometer": null}}]}, "height": {"type": "_ssynth_transformer:ChainTransformer", "params": [{"type": "_ssynth_transformer:StandardScaler", "params": {"lower": 0, "upper": 1, "epsilon": 0.0, "nullable": false, "odometer": null}}, {"type": "_ssynth_transformer:BinTransformer", "params": {"lower": 0, "upper": 1, "epsilon": 0.0, "bins": 20, "nullable": false, "odometer": null}}]}, "weight": {"type": "_ssynth_transformer:ChainTransformer", "params": [{"type": "_ssynth_transformer:ClampTransformer", "params": {"upper": 200, "lower": 10}}, {"type": "_ssynth_transformer:BinTrans

In [304]:
import json
from typing import Union
import pkg_resources

import snsynth

class SSynthDecoder(json.JSONDecoder):
    """Decoder for SSynth constraints from str to model"""

    def __init__(self, *args, **kwargs):
        json.JSONDecoder.__init__(
            self, object_hook=self.object_hook, *args, **kwargs
        )

    def object_hook(
        self, dct: dict
    ) -> Union[tuple, dict]:  # pylint: disable=E0202
        """Hook for custom deserialisation of a SSynth constraints
        For every element, get the associated Transformer attribute.

        Args:
            dct (dict): decoded JSON object

        Raises:
            ValueError: If the serialised object is not compliant with
                        the expected format.

        Returns:
            dct (dict): value to used in place of the decoded JSON object (dct)
        """
        for k, v in dct.items():
            if isinstance(v, str):
                if v[:len(SSYNTH_TRANSFORMER)] == SSYNTH_TRANSFORMER:
                    try:
                        dct[k] = getattr(snsynth.transform, v[len(SSYNTH_TRANSFORMER):])
                    except Exception as e:
                        raise ValueError(e) from e

        return dct


def deserialise_constraints(constraints_json: str) -> dict:
    """Deserialise a DiffPriLip pipeline from string to DiffPrivLib model
    Args:
        constraints_json (str): serialised DiffPrivLib pipeline

    Raises:
        ValueError: If the serialised object is not compliant with
                                    the expected format.

    Returns:
        constraints: DiffPrivLib pipeline
    """
    json_body = json.loads(constraints_json, cls=SSynthDecoder)
    if "module" in json_body.keys():
        if json_body["module"] != SSYNTH:
            raise ValueError(f"JSON 'module' not equal to '{SSYNTH}'")
    else:
        raise ValueError("Key 'module' not in submitted json request.")

    if "version" in json_body.keys():
        if json_body["version"] != pkg_resources.get_distribution(SSYNTH).version:
            raise ValueError(
                f"Requested version does not match available version:"
                f" {pkg_resources.get_distribution(SSYNTH).version}."
            )
    else:
        raise ValueError("Key 'version' not in submitted json request.")

    deserialised = {}
    for key, val in json_body["constraints"].items():
        if isinstance(val['params'], list):
            tranformer_list = []
            for t in val['params']:
                tranformer_list.append(t['type'](**t['params']))
            deserialised[key] = val['type'](tranformer_list)
        else:
            deserialised[key] = val['type'](**val['params'])

    return deserialised

In [305]:
constraints_de = deserialise_constraints(res)

id
{'type': <class 'snsynth.transform.anonymization.AnonymizationTransformer'>, 'params': {'fake': 'email'}}
<class 'snsynth.transform.anonymization.AnonymizationTransformer'>
type

income
{'type': <class 'snsynth.transform.chain.ChainTransformer'>, 'params': [{'type': <class 'snsynth.transform.log.LogTransformer'>, 'params': {}}, {'type': <class 'snsynth.transform.bin.BinTransformer'>, 'params': {'lower': 0, 'upper': 50, 'epsilon': 0.0, 'bins': 20, 'nullable': False, 'odometer': None}}]}
<class 'snsynth.transform.chain.ChainTransformer'>
type



TypeError: snsynth.transform.chain.ChainTransformer() argument after ** must be a mapping, not list

In [316]:
json_body = json.loads(res, cls=SSynthDecoder)

deserialised = {}
for key, val in json_body["constraints"].items():
    if isinstance(val['params'], list):
        tranformer_list = []
        for t in val['params']:
            tranformer_list.append(t['type'](**t['params']))
        deserialised[key] = val['type'](tranformer_list)
    else:
        deserialised[key] = val['type'](**val['params'])

id
{'type': <class 'snsynth.transform.anonymization.AnonymizationTransformer'>, 'params': {'fake': 'email'}}
income
{'type': <class 'snsynth.transform.chain.ChainTransformer'>, 'params': [{'type': <class 'snsynth.transform.log.LogTransformer'>, 'params': {}}, {'type': <class 'snsynth.transform.bin.BinTransformer'>, 'params': {'lower': 0, 'upper': 50, 'epsilon': 0.0, 'bins': 20, 'nullable': False, 'odometer': None}}]}
height
{'type': <class 'snsynth.transform.chain.ChainTransformer'>, 'params': [{'type': <class 'snsynth.transform.standard.StandardScaler'>, 'params': {'lower': 0, 'upper': 1, 'epsilon': 0.0, 'nullable': False, 'odometer': None}}, {'type': <class 'snsynth.transform.bin.BinTransformer'>, 'params': {'lower': 0, 'upper': 1, 'epsilon': 0.0, 'bins': 20, 'nullable': False, 'odometer': None}}]}
weight
{'type': <class 'snsynth.transform.chain.ChainTransformer'>, 'params': [{'type': <class 'snsynth.transform.clamp.ClampTransformer'>, 'params': {'upper': 200, 'lower': 10}}, {'type':

In [317]:
deserialised

{'id': <snsynth.transform.anonymization.AnonymizationTransformer at 0x7f23f121dbd0>,
 'income': <snsynth.transform.chain.ChainTransformer at 0x7f23f101c550>,
 'height': <snsynth.transform.chain.ChainTransformer at 0x7f23f0db4910>,
 'weight': <snsynth.transform.chain.ChainTransformer at 0x7f23f0cbf850>,
 'age': <snsynth.transform.minmax.MinMaxTransformer at 0x7f23f0cbdb90>,
 'sex': <snsynth.transform.chain.ChainTransformer at 0x7f23f0cbf950>,
 'rank': <snsynth.transform.label.LabelTransformer at 0x7f23f0cbc890>,
 'job': <snsynth.transform.drop.DropTransformer at 0x7f23f0cbde10>}

In [319]:
for e_k, e_v in deserialised.items():
    print(e_k)
    print(e_v)
    for attr in dir(e_v):
        print(attr)

id
<snsynth.transform.anonymization.AnonymizationTransformer object at 0x7f23f121dbd0>
__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__getstate__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__sizeof__
__str__
__subclasshook__
__weakref__
_clear_fit
_fit
_fit_complete
_fit_finish
_generate_fake_data
_get_faker_builtin
_inverse_transform
_reset_fit
_transform
allocate_privacy_budget
args
cardinality
fake
fake_inbound
faker
fit
fit_complete
fit_transform
inverse_transform
is_categorical
is_continuous
kwargs
needs_epsilon
output_type
output_width
transform
income
<snsynth.transform.chain.ChainTransformer object at 0x7f23f101c550>
__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__getstate__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__sizeo

In [273]:
constraints = {
    #'id': AnonymizationTransformer("email"),
    'income':
        ChainTransformer([
            LogTransformer(),
            BinTransformer(bins=20, lower=0, upper=50)
        ]),
    'height':
        ChainTransformer([
            StandardScaler(lower = 0, upper = 1),
            BinTransformer(bins=20, lower=0, upper=1)
        ]),
    'weight':
        ChainTransformer([
            ClampTransformer(lower=10, upper = 200),
            BinTransformer(bins=20)
        ]),
    'age': MinMaxTransformer(lower=0, upper=100),
    'sex': ChainTransformer([
        LabelTransformer(nullable=True), 
        OneHotEncoder()
    ]),
    'rank': LabelTransformer(nullable=False),
    'job': DropTransformer()
    #'date': ChainTransformer([DateTimeTransformer(), MinMaxTransformer(nullable=False)])
}

In [279]:
constraints = {
    'id': AnonymizationTransformer("email"),
}

In [275]:
constraints = {
    'rank': LabelTransformer(nullable=False),
}

In [280]:
res = serialise_constraints(constraints)
res

'{"module": "smartnoise-synth", "version": "1.0.4", "constraints": {"id": {"type": "_ssynth_transformer:AnonymizationTransformer", "name": "AnonymizationTransformer", "params": {"fake": "email"}}}}'

In [281]:
json_body = deserialise_constraints(res)
json_body

{'id': {'type': snsynth.transform.anonymization.AnonymizationTransformer,
  'name': 'AnonymizationTransformer',
  'params': {'fake': 'email'}}}

In [282]:
json_body['id'] = json_body['id']['type'](**json_body['id']['params'])