In [1]:
import pandas as pd 

member = pd.read_csv('datasets/real/cidds_train.csv')
non_member = pd.read_csv('datasets/reference/ton.csv')
synth_ctgan = pd.read_csv('datasets/synthetic/ctgan.csv')
synth_tabula = pd.read_csv('datasets/synthetic/tabula.csv')
synth_netshare= pd.read_csv('datasets/synthetic/netshare.csv')

In [2]:
columns = [
		'srcip',
		'dstip',
		'srcport',
		'dstport',
		'proto',
		'td',
		'pkt',
		'byt',
		'label'
	]  

In [3]:
member = member[columns]
non_member = non_member[columns]
synth_ctgan = synth_ctgan[columns]
synth_tabula = synth_tabula[columns]
synth_netshare = synth_netshare[columns]

In [4]:
from src.privacy.flowmia_gan import FlowMIA_GAN

flowmia_gan_netshare = FlowMIA_GAN(	member, non_member, synth_netshare, 
                                   	categorical_cols=['proto', 'label'],
        							numerical_cols=['srcport', 'dstport', 'td', 'pkt', 'byt'],
               						ip_cols=['srcip', 'dstip'], 
                     				batch_size = 128)



In [5]:
history = flowmia_gan_netshare.fit(epochs=10, fcheckpoint=10, save_path='results')

Fitting the pre processors...
Preprocessors fitted. Starting GAN training...


Training GAN: 100%|██████████| 10/10 [00:26<00:00,  2.68s/epoch, D_loss=0.6485, G_loss=2.1920]


✓ Model checkpoint 10 saved to results/checkpoint_epoch_10.pth





In [6]:
results = flowmia_gan_netshare.membership_inference()

In [7]:
results

{'score_members': array([0.04946543, 0.04946543, 0.9248557 , ..., 0.88950926, 0.8895389 ,
        0.8895389 ], dtype=float32),
 'score_non_members': array([1.        , 0.10627155, 0.20707135, ..., 0.8681158 , 0.8685318 ,
        0.8710715 ], dtype=float32),
 'score_random': array([0.07383598, 0.02507735, 0.7035966 , ..., 0.04750786, 0.07141925,
        0.46352077], dtype=float32),
 'score_synthetic': array([0.9985055 , 0.9795737 , 0.9584859 , ..., 0.88416666, 0.23030867,
        0.895265  ], dtype=float32),
 'auc': 0.7069759476622949,
 'accuracy': 0.7429498915367929,
 'precision': 0.8184513006654568,
 'recall': 0.6244153846153846,
 'threshold': 0.88865364,
 'threshold_method': 'optimal',
 'mean_score_members': 0.6474371,
 'mean_score_non_members': 0.30735222,
 'mean_score_random': 0.18299259,
 'mean_score_synthetic': 0.7758569,
 'std_score_members': 0.3801224,
 'std_score_non_members': 0.37060377,
 'std_score_random': 0.21721798,
 'std_score_synthetic': 0.2783162,
 'score_gap_member_no

In [8]:
colors = {
            'members': '#e74c3c',
            'non_members': '#3498db',
            'synthetic': '#2ecc71',
            'random': '#95a5a6'
        }

In [None]:
flowmia_gan_netshare.plot_all(results=results, colors=colors, save_path='results/plots')

In [11]:
metadata_dict = {
    "primary_key": None,
    "columns": {
        "srcip": {"sdtype": "numerical"},
        "srcport": {"sdtype": "numerical"},
        "dstip": {"sdtype": "numerical"},
        "dstport": {"sdtype": "numerical"},
        "proto": {"sdtype": "categorical"},
        "td": {"sdtype": "numerical"},
        "byt": {"sdtype": "numerical"},
        "pkt": {"sdtype": "numerical"},
        "label": {"sdtype": "categorical"}
    }
}

In [None]:
from sdmetrics.single_table import DCROverfittingProtection

DCROverfittingProtection.compute_breakdown(
    real_training_data=member.sample(n=15000, random_state=42), 
    synthetic_data=synth_netshare.sample(n=15000, random_state=42),
    real_validation_data=non_member.sample(n=15000, random_state=42),
    metadata=metadata_dict
)