# Zona de Pruebas

Este notebook se usa para probar la funcionalidades y aprender como funciona realmente la librería.

In [1]:
from ctgan import load_demo
real_data = load_demo()

In [2]:
from CraftingTable import TVAE, CTGAN, TabDDPM_ResNet, TabDDPM_MLP

tvae = TVAE()
ctgan = CTGAN()
tabddpm_resnet = TabDDPM_ResNet()
tabddpm_mlp = TabDDPM_MLP()

In [3]:
print(tvae)
print(ctgan)
print(tabddpm_resnet)
print(tabddpm_mlp)

TVAE(embedding_dim=128, compress_dims=(128, 128), decompress_dims=(128, 128))
CTGAN(embedding_dim=128, generator_dim=(256, 256), discriminator_dim=(256, 256))
TabDDPM_ResNet(d_in=None, d_out=None, dim_t=256, n_blocks=2, d_main=128, d_hidden=256, dropout_first=0.1, dropout_second=0.1)
TabDDPM_MLP(d_in=None, d_out=None, dim_t=256, d_layers=(8, 16), dropout=0.1)


In [4]:
discrete_columns = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
    'income'
]
epochs=1000
verbose=True

In [5]:
tabddpm_mlp.fit(real_data[:100], discrete_columns, steps=1000)

Step 500/1000 - MLoss: 0.0 | GLoss: 0.9542 | Total: 0.9542
Step 1000/1000 - MLoss: 0.0 | GLoss: 0.9502 | Total: 0.9502


In [6]:
tabddpm_resnet.fit(real_data[:100], discrete_columns, steps=1000)

Step 500/1000 - MLoss: 0.0 | GLoss: 0.2758 | Total: 0.2758
Step 1000/1000 - MLoss: 0.0 | GLoss: 0.2164 | Total: 0.2164


In [7]:
tvae.fit(real_data[:100], discrete_columns)

Loss: 8.332: 100%|██████████| 300/300 [00:05<00:00, 58.73it/s] 


In [8]:
ctgan.fit(real_data[:100], discrete_columns)

Gen. (-0.04) | Discrim. (-0.01): 100%|██████████| 300/300 [00:13<00:00, 22.11it/s]


In [9]:
tvae.get_metadata()['model']

{'model_type': 'TVAE',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-04-26 20:44:37',
    'Fit_duration': '0:00:05',
    'Loss':      Epoch       Loss
    0        0  42.040791
    1        1  38.232529
    2        2  36.155357
    3        3  35.069859
    4        4  36.071705
    ..     ...        ...
    295    295   5.105358
    296    296   4.090278
    297    297   5.186253
    298    298   5.045595
    299    299   8.332346
    
    [300 rows x 2 columns]}]},
 'hyperparmeters': {'embeddig_dim': 128,
  'compress_dims': (128, 128),
  'decompress_dims': (128, 128)}}

In [10]:
ctgan.get_metadata()['model']

{'model_type': 'CTGAN',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-04-26 20:44:42',
    'Fit_duration': '0:00:13',
    'Loss':      Epoch  Generator Loss  Discriminator Loss
    0        0        1.809631            0.017784
    1        1        1.845032           -0.023010
    2        2        1.798043           -0.049058
    3        3        1.760463           -0.062069
    4        4        1.769007           -0.085423
    ..     ...             ...                 ...
    295    295        0.025319            0.032203
    296    296        0.056052           -0.073968
    297    297       -0.039753            0.027031
    298    298       -0.164554           -0.041716
    299    299       -0.036774           -0.010560
    
    [300 rows x 3 columns]}]},
 'hyperparmeters': {'embeddig_dim': 128,
  'generator_dim': (256, 256),
  'discriminator_dim': (256, 256)}}

In [11]:
tabddpm_resnet.get_metadata()['model']

{'model_type': 'TabDDPM_ResNet',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-04-26 20:44:27',
    'Fit_duration': '0:00:09',
    'Loss':      Loss    Step
    0  0.6183   100.0
    1  0.3755   200.0
    2  0.3164   300.0
    3  0.2863   400.0
    4  0.2758   500.0
    5  0.2602   600.0
    6  0.2477   700.0
    7  0.2376   800.0
    8  0.2252   900.0
    9  0.2164  1000.0}]},
 'hyperparmeters': {'d_in': 84, 'd_out': 84}}

In [12]:
tabddpm_mlp.get_metadata()['model']

{'model_type': 'TabDDPM_MLP',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-04-26 20:44:19',
    'Fit_duration': '0:00:07',
    'Loss':      Loss    Step
    0  0.9917   100.0
    1  0.9663   200.0
    2  0.9582   300.0
    3  0.9518   400.0
    4  0.9542   500.0
    5  0.9501   600.0
    6  0.9510   700.0
    7  0.9497   800.0
    8  0.9509   900.0
    9  0.9502  1000.0}]},
 'hyperparmeters': {'d_in': 84, 'd_out': 84}}

In [13]:
tabddpm_mlp.sample(100)

Sample using DDIM.
Sample timestep    0


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,121,Self-emp-inc,-217336,Prof-school,7,Never-married,Sales,Not-in-family,Amer-Indian-Eskimo,Female,-886,-162,32,United-States,>50K
1,59,?,-145204,5th-6th,6,Married-civ-spouse,Prof-specialty,Own-child,Other,Male,27540,4219,109,Honduras,<=50K
2,121,Private,-217336,HS-grad,19,Divorced,Tech-support,Not-in-family,Other,Female,896,-1259,32,Mexico,<=50K
3,1,State-gov,495625,10th,-3,Divorced,?,Unmarried,Other,Male,-15258,-1259,109,Honduras,<=50K
4,59,Self-emp-inc,-217336,9th,16,Never-married,Other-service,Not-in-family,Black,Male,896,-162,109,Cuba,>50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,21,Self-emp-inc,798960,5th-6th,6,Married-spouse-absent,Exec-managerial,Husband,White,Female,-886,-162,109,India,<=50K
96,59,Self-emp-not-inc,-145204,HS-grad,6,Married-civ-spouse,Transport-moving,Own-child,Asian-Pac-Islander,Female,896,-1259,32,?,<=50K
97,21,Private,798960,Masters,6,Separated,Machine-op-inspct,Unmarried,Black,Female,896,-1259,109,South,<=50K
98,1,Self-emp-not-inc,-217336,Masters,19,Married-civ-spouse,?,Own-child,Asian-Pac-Islander,Male,896,164,-24,United-States,>50K


In [14]:
tabddpm_resnet.sample(100)

Sample using DDIM.
Sample timestep    0


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,-19,Local-gov,-217336,Prof-school,12,Married-spouse-absent,Other-service,Not-in-family,Other,Male,-886,-1259,-24,Puerto-Rico,>50K
1,-19,?,-217336,Assoc-acdm,-3,Married-spouse-absent,Farming-fishing,Not-in-family,Asian-Pac-Islander,Female,-15258,-1259,-24,India,>50K
2,-19,Self-emp-inc,798960,11th,19,Married-spouse-absent,Other-service,Not-in-family,Amer-Indian-Eskimo,Female,896,-1259,-24,India,>50K
3,75,Federal-gov,-217336,11th,6,Married-AF-spouse,Protective-serv,Not-in-family,Asian-Pac-Islander,Female,27540,4219,32,India,>50K
4,75,Self-emp-not-inc,-217336,HS-grad,7,Married-AF-spouse,Farming-fishing,Husband,White,Female,-15258,164,-24,Mexico,>50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-19,?,798960,Bachelors,6,Divorced,Sales,Not-in-family,Other,Female,896,4219,-24,Jamaica,>50K
96,121,Self-emp-inc,495625,Doctorate,19,Married-spouse-absent,Transport-moving,Not-in-family,Amer-Indian-Eskimo,Female,27540,4219,109,Mexico,>50K
97,21,Local-gov,495625,7th-8th,19,Separated,Adm-clerical,Unmarried,Other,Female,-15258,164,32,South,>50K
98,121,Federal-gov,495625,11th,16,Married-spouse-absent,Other-service,Own-child,Black,Male,-15258,4219,48,Cuba,>50K


In [15]:
ctgan.sample(100)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,46,Private,248601,Some-college,7,Married-civ-spouse,Tech-support,Not-in-family,Black,Male,479,25,41,Mexico,<=50K
1,21,Private,212161,Bachelors,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,-344,-17,25,United-States,<=50K
2,50,?,187721,Assoc-voc,10,Married-civ-spouse,Prof-specialty,Husband,White,Male,-645,-92,37,United-States,>50K
3,68,Private,314689,Some-college,7,Married-civ-spouse,Prof-specialty,Not-in-family,White,Male,437,-49,90,United-States,>50K
4,63,Local-gov,421443,7th-8th,12,Married-civ-spouse,Prof-specialty,Wife,Asian-Pac-Islander,Male,-1133,-40,50,India,>50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,48,Private,200749,11th,9,Married-spouse-absent,?,Unmarried,Amer-Indian-Eskimo,Male,-372,19,68,United-States,<=50K
96,82,Private,65666,Prof-school,7,Married-civ-spouse,Machine-op-inspct,Not-in-family,Black,Male,2111,71,40,Honduras,<=50K
97,20,Private,149865,Some-college,11,Never-married,Prof-specialty,Own-child,White,Male,-1115,-17,42,United-States,<=50K
98,55,Private,-7108,7th-8th,14,Never-married,Machine-op-inspct,Husband,White,Male,-626,-121,43,United-States,<=50K


In [16]:
tvae.sample(100)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,32,Private,250586,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,71,-6,41,United-States,>50K
1,22,Private,131884,HS-grad,8,Married-civ-spouse,Sales,Husband,White,Male,-46,-5,38,United-States,<=50K
2,46,Private,175206,Doctorate,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,65,-4,44,United-States,>50K
3,34,Private,187599,Some-college,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,48,-5,60,United-States,<=50K
4,31,Private,96050,Some-college,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,64,-8,55,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,35,Private,134137,HS-grad,9,Married-civ-spouse,Adm-clerical,Husband,White,Male,-9,-3,40,United-States,<=50K
96,41,Private,182362,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,-31,-4,40,United-States,<=50K
97,25,Private,76172,HS-grad,10,Married-civ-spouse,Sales,Not-in-family,White,Male,57,-5,40,United-States,<=50K
98,30,Private,149600,Some-college,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,10,-8,60,United-States,<=50K


In [18]:
tvae.eval_ml(real_data, 'sex', 'classification', 'GaussianNB', metrics=['classification_report'])

{'real': {'classification_report':               precision    recall  f1-score      support
  0              0.456858  0.766708  0.572551  3232.000000
  1              0.826467  0.549335  0.659989  6537.000000
  accuracy       0.621251  0.621251  0.621251     0.621251
  macro avg      0.641663  0.658021  0.616270  9769.000000
  weighted avg   0.704185  0.621251  0.631061  9769.000000},
 'fake': {'classification_report':               precision    recall  f1-score     support
  0              0.500787  0.098391  0.164469  3232.00000
  1              0.680972  0.951507  0.793823  6537.00000
  accuracy       0.669260  0.669260  0.669260     0.66926
  macro avg      0.590880  0.524949  0.479146  9769.00000
  weighted avg   0.621359  0.669260  0.585606  9769.00000}}