In [6]:
import json
from time import time
import pandas as pd
from pathlib import Path
from dpmm.pipelines import MSTPipeline


# Load data - WINE

In [7]:
wine_dir = Path().parent / "wine"

df = pd.read_pickle(wine_dir / "wine.pkl.gz")
with (wine_dir / "wine_bounds.json").open("r") as f:
    domain = json.load(f)


df.head()

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4192,white,6.3,0.21,0.29,11.7,0.048,49.0,147.0,0.99482,3.22,0.38,10.8,0
2157,white,5.9,0.21,0.24,12.1,0.044,53.0,165.0,0.9969,3.25,0.39,9.5,0
631,red,10.4,0.28,0.54,2.7,0.105,5.0,19.0,0.9988,3.25,0.63,9.5,0
3410,white,7.6,0.38,0.2,3.4,0.046,9.0,116.0,0.9944,3.15,0.41,9.4,0
3117,white,8.4,0.23,0.49,7.8,0.035,22.0,95.0,0.9935,3.04,0.34,12.0,1


# Build pipeline

In [8]:
pipeline = MSTPipeline(
    epsilon=1,  # Privacy budget for generate model
    proc_epsilon=0.1,  # Privacy budget for data processing 
    delta=1e-5,  # Delta Setting in (eps,  delta) differential privacy
)

# Fit step

In [9]:
print("Fitting MST pipeline...", end='\r')
start_time = time()
# The number of rows to generate
pipeline.fit(df, domain)
fit_time = time()

print(f"Fitting MST pipeline - Took {fit_time - start_time:.2f} seconds")

Fitting MST pipeline - Took 16.47 seconds


# Generate step

In [10]:
print("Generating synthetic data...", end="\r")
# Generate synthetic data
synth_df = pipeline.generate(df.shape[0])
gen_time = time()

print(f"Generating synthetic data - Took {gen_time - fit_time:.2f} seconds")

synth_df.head()

Generating synthetic data - Took 0.07 seconds


Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,white,5.288142,0.19033,0.212473,1.402665,0.032305,37.097305,60.585301,0.990234,2.998241,0.658841,12.467682,1
1,white,5.956364,0.225099,0.210124,15.968057,0.04362,70.073909,202.689578,0.995807,3.198247,0.318414,10.29039,0
2,white,5.315535,0.341091,0.247268,0.62824,0.024938,52.468176,104.892353,0.990975,3.161218,0.971699,11.181373,1
3,white,7.879125,0.23417,0.275704,3.71161,0.039565,68.977194,163.38055,1.005989,3.068622,0.79852,8.075999,0
4,white,6.981342,0.358461,0.337705,3.60039,0.05045,51.567452,134.896467,0.996149,3.272745,0.599021,10.2004,0
