In [None]:
import os,sys,torch
import pandas as pd

In [None]:
from bulian.utils import set_seed
from bulian.Tabular.synthesizers import TwinSynthesizer,PrivateTwinSynthesizer
from bulian.metrics import *
from bulian.metrics.reports import *
from bulian.metrics import compute_metrics
from bulian.metrics.single_table import SingleTableMetric
from bulian.metrics.single_table import *

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [None]:
set_seed(42)

In [None]:
discrete_columns = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
    "income",
]

In [None]:
import torch
torch.__version__

In [None]:
torch.cuda.is_available()

In [None]:
!nvidia-smi

## Normal API: Non-privately differentiable synthesizer

In [None]:
data = pd.read_csv("adult.csv")

In [None]:
data.head()

In [None]:
synth = TwinSynthesizer(batch_size=200,device='cpu')   ### cpu else cuda

In [None]:
synth.fit(data=data,epochs=2,discrete_columns=discrete_columns)

In [None]:
sample = synth.sample(1000)

In [None]:
sample

In [None]:
metrics = SingleTableMetric.get_subclasses()
numeric_features = ['capital-gain','capital-loss','hours-per-week']
discrete_columns = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
    "income",
]

In [None]:
metrics

In [None]:
compute_metrics(metrics,data, sample,)

In [None]:
compute_metrics(metrics,data, sample)

#### Report with privacy metrics

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features, key_fields=['age','workclass','education'],sensitive_fields = ['income'])

#### Report without privacy metrics, but includes ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features,target='income')

#### Report without privacy metrics and without ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features)

#### Save model to disk

In [None]:
synth.save('NormalAPI.pth')

#### 

## Adversarial API: Non-privately differentiable synthesizer

In [None]:
synth = TwinSynthesizer(batch_size=200,device='cpu')   ### cpu else cuda

In [None]:
synth.fit_adversarial(data=data,epochs=2,discrete_columns=discrete_columns,test_pct=0.33)

In [None]:
sample = synth.sample_adversarial(data,1000)

In [None]:
metrics = SingleTableMetric.get_subclasses()
numeric_features = ['capital-gain','capital-loss','hours-per-week']
discrete_columns = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
    "income",
]

#### Report with privacy metrics

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features, key_fields=['age','workclass','education'],sensitive_fields = ['income'])

#### Report without privacy metrics, but includes ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features,target='income')

#### Report without privacy metrics and without ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features)

#### Save model to disk

In [None]:
synth.save('AdversarialAPI.pth')

#### 

## Normal API: Privately differentiable synthesizer

In [None]:
synth = PrivateTwinSynthesizer(epsilon=0.1,batch_size=64,device='cpu')   ### cpu else cuda

In [None]:
synth.fit(data=data,discrete_columns=discrete_columns,update_epsilon=1)

In [None]:
sample = synth.sample(1000)

In [None]:
metrics = SingleTableMetric.get_subclasses()
numeric_features = ['capital-gain','capital-loss','hours-per-week']
discrete_columns = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
    "income",
]

#### Report with privacy metrics

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features, key_fields=['age','workclass','education'],sensitive_fields = ['income'])

#### Report without privacy metrics, but includes ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features,target='income')

#### Report without privacy metrics and without ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features)

##### Get Report on Dashboard

Reports can be viewed on a dashboard webpage by using the option boolean <b>show_dashboard</b> parameter on the <b>get_full_report</b> function. The dashboard is a Dash application which runs on a local server. You can also specify which port to use for the local server, by default the app would run on <b>8050</b>.

In [None]:
get_full_report(
    data,
    sample,
    discrete_columns,
    numeric_features,
    key_fields=['age','workclass','education'],
    sensitive_fields = ['income'],
    show_dashboard=True,
    port=8050
)

#### Save model to disk

In [None]:
synth.save('PrivateModelNormalAPI.pth')

#### 

## Adversarial API: Privately differentiable synthesizer

In [None]:
synth = PrivateTwinSynthesizer(epsilon=0.1,batch_size=64,device='cpu')   ### cpu else cuda

In [None]:
synth.fit_adversarial(data=data,discrete_columns=discrete_columns,update_epsilon=1)

In [None]:
sample = synth.sample_adversarial(data,1000)

In [None]:
metrics = SingleTableMetric.get_subclasses()
numeric_features = ['capital-gain','capital-loss','hours-per-week']
discrete_columns = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
    "income",
]

#### Report with privacy metrics

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features, key_fields=['age','workclass','education'],sensitive_fields = ['income'])

#### Report without privacy metrics, but includes ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features,target='income')

#### Report without privacy metrics and without ML efficacy stuff

In [None]:
get_full_report(data, sample,discrete_columns,numeric_features)

##### Generate Report on a Dashboard

Reports can be viewed on a dashboard webpage by using the option boolean <b>show_dashboard</b> parameter on the <b>get_full_report</b> function. The dashboard is a Dash application which runs on a local server. You can also specify which port to use for the local server, by default the app would run on <b>8050</b>.

In [None]:
get_full_report(
    data,
    sample,
    discrete_columns,
    numeric_features,
    key_fields=['age','workclass','education'],
    sensitive_fields = ['income'],
    show_dashboard=True,
    port=8050
)

#### Save model to disk

In [None]:
synth.save('PrivateModelAdvAPI.pth')

#### 

### Fin ###