In [1]:
import json
from time import time
import pandas as pd
from pathlib import Path
from dpmm.pipelines import MSTPipeline

# Load data

In [3]:
wine_dir = Path().parent / "wine"

df = pd.read_pickle(wine_dir / "wine.pkl.gz")
with (wine_dir / "wine_bounds.json").open("r") as f:
    domain = json.load(f)

df.head()

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4192,white,6.3,0.21,0.29,11.7,0.048,49.0,147.0,0.99482,3.22,0.38,10.8,0
2157,white,5.9,0.21,0.24,12.1,0.044,53.0,165.0,0.9969,3.25,0.39,9.5,0
631,red,10.4,0.28,0.54,2.7,0.105,5.0,19.0,0.9988,3.25,0.63,9.5,0
3410,white,7.6,0.38,0.2,3.4,0.046,9.0,116.0,0.9944,3.15,0.41,9.4,0
3117,white,8.4,0.23,0.49,7.8,0.035,22.0,95.0,0.9935,3.04,0.34,12.0,1


# Build pipeline

In [4]:
pipeline = MSTPipeline(
    epsilon=1,  # Privacy budget for generate model
    proc_epsilon=0.1,  # Privacy budget for data processing 
    binner_type="priv-tree",  # Discretisation algorithm
    gen_kwargs={
        "n_iters": 5000,
    },
    delta=1e-5,  # Delta Setting in (eps,  delta) differential privacu
    compress=True,  # Domain Compression paramter
    max_model_size=80,  # Maximum Model Size in Megabytes
    n_bins="auto"  # Number of bins 
)

# Fit step

In [5]:
print("Fitting MST pipeline...", end='\r')
start_time = time()
# The number of rows to generate
pipeline.fit(df, domain)
fit_time = time()

print(f"Fitting MST pipeline - Took {fit_time - start_time:.2f} seconds")

Fitting MST pipeline - Took 16.34 seconds


# Generate step

In [6]:
print("Generating synthetic data...", end="\r")
# Generate synthetic data
synth_df = pipeline.generate(df.shape[0])
gen_time = time()

print(f"Generating synthetic data - Took {gen_time - fit_time:.2f} seconds")

synth_df.head()


Generating synthetic data - Took 137.56 seconds


Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,white,6.321687,0.306169,0.25632,13.14925,0.010564,47.542974,189.577728,0.991049,3.166885,0.458416,12.247038,1
1,white,7.649213,0.229516,0.270864,2.512928,0.055915,60.59514,218.254413,0.995743,3.126779,0.676852,9.750931,1
2,white,5.248272,0.64834,0.240031,0.646026,0.062518,30.358869,106.921606,0.989296,2.900207,0.554944,11.710188,1
3,red,6.387336,0.613944,0.28697,2.451387,0.09294,14.630072,35.607284,0.990929,3.27675,0.625237,9.17419,1
4,white,6.283964,0.213128,0.300594,2.647109,0.032269,39.401355,123.560192,0.990474,3.14627,0.612417,11.390396,0
