In [1]:
# https://github.com/sdv-dev/CTGAN

In [1]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print("cuDNN Version:", torch.backends.cudnn.version())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")


CUDA Available: True
CUDA Version: 11.8
cuDNN Version: 8700
GPU Name: NVIDIA A100 80GB PCIe


In [2]:
from ctgan import CTGAN
from ctgan import load_demo
import pandas as pd
import pickle
import numpy as np

In [7]:
real_data = load_demo()
# real_data=real_data[:2000]
real_data.head(), len(real_data)

(   age         workclass  fnlwgt  education  education-num  \
 0   39         State-gov   77516  Bachelors             13   
 1   50  Self-emp-not-inc   83311  Bachelors             13   
 2   38           Private  215646    HS-grad              9   
 3   53           Private  234721       11th              7   
 4   28           Private  338409  Bachelors             13   
 
        marital-status         occupation   relationship   race     sex  \
 0       Never-married       Adm-clerical  Not-in-family  White    Male   
 1  Married-civ-spouse    Exec-managerial        Husband  White    Male   
 2            Divorced  Handlers-cleaners  Not-in-family  White    Male   
 3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   
 4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   
 
    capital-gain  capital-loss  hours-per-week native-country income  
 0          2174             0              40  United-States  <=50K  
 1             0          

In [8]:
ctgan = CTGAN(
    embedding_dim=64,
    generator_dim=(128, 128),
    discriminator_dim=(128, 128),
    generator_lr=1e-4,
    generator_decay=1e-6,
    discriminator_lr=1e-4,
    discriminator_decay=1e-6,
    batch_size=16,
    discriminator_steps=1,
    log_frequency=True,
    verbose=True,
    epochs=200,
    pac=4,
    cuda=True
)

discrete_columns = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
    'income'
]

# ctgan = CTGAN(epochs=15)
ctgan.fit(real_data, discrete_columns)
# ctgan.fit(train_data_df, column_names)
# ctgan.fit(client_0_df, column_names)


Gen. (0.31) | Discrim. (-0.04):   6%|████▋                                                                                 | 11/200 [07:23<2:07:07, 40.36s/it]


KeyboardInterrupt: 

In [6]:
synthetic_data = ctgan.sample(1000)

In [7]:
synthetic_data

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,48,Private,196362,HS-grad,14,Never-married,Sales,Husband,White,Female,101,1859,40,Italy,<=50K
1,34,Private,242623,Some-college,13,Never-married,Prof-specialty,Husband,White,Female,11,1,40,England,<=50K
2,65,Never-worked,182866,HS-grad,17,Never-married,Craft-repair,Husband,Black,Male,28,1,40,United-States,<=50K
3,21,Private,359849,Bachelors,10,Never-married,Transport-moving,Not-in-family,White,Male,9,-5,30,Cambodia,<=50K
4,39,Private,135387,9th,10,Married-civ-spouse,Adm-clerical,Own-child,White,Male,23,1493,40,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,23,Private,224693,Some-college,9,Married-civ-spouse,Craft-repair,Wife,White,Male,-132,1,40,United-States,<=50K
996,21,Private,133628,Bachelors,9,Married-civ-spouse,Prof-specialty,Not-in-family,White,Male,-55,-2,40,United-States,>50K
997,14,Private,241471,HS-grad,9,Never-married,Sales,Own-child,White,Female,130,-2,40,India,<=50K
998,44,State-gov,57001,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,14,-3,40,United-States,<=50K


In [5]:
synthetic_data

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,15,Private,135446,HS-grad,9,Married-civ-spouse,Craft-repair,Not-in-family,White,Female,-104,-4,40,El-Salvador,<=50K
1,22,?,156041,HS-grad,6,Married-civ-spouse,Machine-op-inspct,Unmarried,White,Female,-24,-2,40,United-States,>50K
2,19,Private,71492,Bachelors,10,Separated,Adm-clerical,Husband,White,Male,6,5,40,United-States,<=50K
3,35,Private,98080,11th,7,Married-civ-spouse,Machine-op-inspct,Not-in-family,Asian-Pac-Islander,Female,-29,5,40,United-States,<=50K
4,43,Private,148518,Some-college,13,Married-civ-spouse,Other-service,Not-in-family,Asian-Pac-Islander,Female,-117,0,28,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,25,Private,295330,5th-6th,9,Never-married,Adm-clerical,Not-in-family,White,Male,-81,-1,40,United-States,<=50K
996,17,Private,125097,Bachelors,9,Married-civ-spouse,Prof-specialty,Own-child,White,Female,2510,5,40,United-States,<=50K
997,34,Private,59015,HS-grad,13,Never-married,Other-service,Own-child,White,Female,-106,4,33,United-States,<=50K
998,21,Private,188680,Masters,10,Married-civ-spouse,Craft-repair,Own-child,White,Female,21,0,36,United-States,<=50K
