# Import

In [25]:
import pandas as pd
from sdv.metadata import SingleTableMetadata
from sdv.metadata import MultiTableMetadata

External Functions

In [26]:
%run models/mt_HMA.py
%run models/st_CTGAN.py
%run models/st_FAST_ML.py

Real Data

In [27]:
RD = pd.read_csv('../data/real_data/d_nan.csv')

# HMASynthesizer

The HMA Synthesizer uses hierarchical ML algorithm to learn from real data and generate synthetic data.

The HMASynthesizer is designed to capture correlations between different tables with high quality. The algorithm is optimized for datasets with around 5 tables and 1 level of depth (eg. a parent and its child table). You may find the modeling time will increase if you have multiple levels of tables and more columns.

### Import Metadata

In [28]:
# Multi Table metadata
metadata = MultiTableMetadata.load_from_json('../data/metadata/metadata_mt_d_nan.json')

### Model Fitting

In [29]:
SD_HMA = HMA(RD, metadata)



### Export

In [30]:
# Export synthetic dataset
SD_HMA_d1 = pd.DataFrame(SD_HMA['d1'])
SD_HMA_d1.to_csv('../data/synthetic_data/HMA_d_nan.csv', index=False)

# CTGAN Model Fitting

The CTGAN Synthesizer uses GAN-based, deep learning methods to train a model and generate synthetic data.

### Import Metadata

In [31]:
# Single Table metadata
# metadata = SingleTableMetadata.load_from_json('../data/metadata/metadata_st_d1.json')

### Model Fitting

In [32]:
# SD_CTGAN = CTGAN(RD, metadata)

### Export

In [33]:
# Export synthetic dataset
# SD_CTGAN = pd.DataFrame(SD_CTGAN)
# SD_CTGAN.to_csv('../data/synthetic_data/CTGAN_d_nan.csv', index=False)

# Fast ML Preset

The Fast ML Preset synthesizer is optimized for modeling speed. 

### Import Metadata

In [34]:
# Single Table metadata
metadata = SingleTableMetadata.load_from_json('../data/metadata/metadata_st_d_nan.json')

### Model Fitting

In [35]:
SD_FAST_ML = FAST_ML(RD, metadata)

### Export

In [36]:
# Export synthetic dataset
SD_FAST_ML.to_csv('../data/synthetic_data/FAST_ML_d_nan.csv', index=False)