# Zona de Pruebas

Este notebook se usa para probar la funcionalidades y aprender como funciona realmente la librería.

In [1]:
from ctgan import load_demo
real_data = load_demo()

In [2]:
from CraftingTable import TVAE, CTGAN, TabDDPM_ResNet, TabDDPM_MLP

tvae = TVAE()
ctgan = CTGAN()
tabddpm_resnet = TabDDPM_ResNet()
tabddpm_mlp = TabDDPM_MLP()

In [3]:
print(tvae)
print(ctgan)
print(tabddpm_resnet)
print(tabddpm_mlp)

TVAE(embedding_dim=128, compress_dims=(128, 128), decompress_dims=(128, 128))
CTGAN(embedding_dim=128, generator_dim=(256, 256), discriminator_dim=(256, 256))
TabDDPM_ResNet(d_in=None, d_out=None, dim_t=256, n_blocks=2, d_main=128, d_hidden=256, dropout_first=0.1, dropout_second=0.1)
TabDDPM_MLP(d_in=None, d_out=None, dim_t=256, d_layers=(8, 16), dropout=0.1)


In [4]:
discrete_columns = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
    'income'
]
epochs=1000
verbose=True

In [5]:
tabddpm_mlp.fit(real_data[:100], discrete_columns, steps=1000)

Step 500/1000 - MLoss: 0.0 | GLoss: 0.9533 | Total: 0.9533
Step 1000/1000 - MLoss: 0.0 | GLoss: 0.9501 | Total: 0.9501


In [6]:
tabddpm_resnet.fit(real_data[:100], discrete_columns, steps=1000)

Step 500/1000 - MLoss: 0.0 | GLoss: 0.2718 | Total: 0.2718
Step 1000/1000 - MLoss: 0.0 | GLoss: 0.2202 | Total: 0.2202


In [7]:
tvae.fit(real_data[:100], discrete_columns)

Loss: 4.592: 100%|██████████| 300/300 [00:04<00:00, 72.35it/s] 


In [8]:
ctgan.fit(real_data[:100], discrete_columns)

Gen. (0.20) | Discrim. (-0.15): 100%|██████████| 300/300 [00:11<00:00, 26.32it/s]


In [9]:
tvae.get_metadata()['model']

{'model_type': 'TVAE',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-05-04 11:25:48',
    'Fit_duration': '0:00:04',
    'Loss':      Epoch       Loss
    0        0  47.312416
    1        1  41.219418
    2        2  36.765972
    3        3  35.498333
    4        4  35.515263
    ..     ...        ...
    295    295   5.250002
    296    296   6.302226
    297    297   6.224199
    298    298   5.924520
    299    299   4.592421
    
    [300 rows x 2 columns]}]},
 'hyperparmeters': {'embeddig_dim': 128,
  'compress_dims': (128, 128),
  'decompress_dims': (128, 128)}}

In [10]:
ctgan.get_metadata()['model']

{'model_type': 'CTGAN',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-05-04 11:25:52',
    'Fit_duration': '0:00:11',
    'Loss':      Epoch  Generator Loss  Discriminator Loss
    0        0        1.910680           -0.008687
    1        1        1.932163           -0.043877
    2        2        1.882386           -0.061495
    3        3        1.880321           -0.065540
    4        4        1.828088           -0.129073
    ..     ...             ...                 ...
    295    295        0.109515           -0.016491
    296    296        0.127899           -0.066041
    297    297        0.073714           -0.054687
    298    298        0.185193           -0.009719
    299    299        0.199413           -0.145305
    
    [300 rows x 3 columns]}]},
 'hyperparmeters': {'embeddig_dim': 128,
  'generator_dim': (256, 256),
  'discriminator_dim': (256, 256)}}

In [11]:
tabddpm_resnet.get_metadata()['model']

{'model_type': 'TabDDPM_ResNet',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-05-04 11:25:39',
    'Fit_duration': '0:00:08',
    'Loss':      Loss    Step
    0  0.6269   100.0
    1  0.3771   200.0
    2  0.3215   300.0
    3  0.2894   400.0
    4  0.2718   500.0
    5  0.2614   600.0
    6  0.2442   700.0
    7  0.2350   800.0
    8  0.2234   900.0
    9  0.2202  1000.0}]},
 'hyperparmeters': {'d_in': 84, 'd_out': 84}}

In [12]:
tabddpm_mlp.get_metadata()['model']

{'model_type': 'TabDDPM_MLP',
 'hyperparameters': {},
 'fit_settings': {'times_fitted': 1,
  'fit_history': [{'Time_of_fit': '2025-05-04 11:25:33',
    'Fit_duration': '0:00:05',
    'Loss':      Loss    Step
    0  0.9945   100.0
    1  0.9743   200.0
    2  0.9588   300.0
    3  0.9571   400.0
    4  0.9533   500.0
    5  0.9548   600.0
    6  0.9524   700.0
    7  0.9560   800.0
    8  0.9482   900.0
    9  0.9501  1000.0}]},
 'hyperparmeters': {'d_in': 84, 'd_out': 84}}

In [13]:
tabddpm_mlp.sample(100)

Sample using DDIM.
Sample timestep    0


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,121,State-gov,495625,7th-8th,19,Divorced,Adm-clerical,Other-relative,Amer-Indian-Eskimo,Female,-886,4219,109,South,<=50K
1,121,Self-emp-not-inc,798960,Masters,16,Married-spouse-absent,Other-service,Not-in-family,Black,Female,-15258,4219,48,Honduras,<=50K
2,1,Self-emp-not-inc,798960,Doctorate,-3,Married-AF-spouse,Tech-support,Husband,White,Female,896,-162,32,United-States,<=50K
3,121,State-gov,-145204,Masters,6,Divorced,Machine-op-inspct,Other-relative,Amer-Indian-Eskimo,Female,-15258,-162,109,United-States,<=50K
4,75,?,-145204,7th-8th,16,Married-spouse-absent,Protective-serv,Other-relative,Black,Female,-15258,164,32,South,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,75,?,495625,10th,19,Never-married,Machine-op-inspct,Husband,Asian-Pac-Islander,Male,-886,164,48,Mexico,<=50K
96,21,?,495625,Prof-school,6,Separated,Craft-repair,Other-relative,White,Male,-15258,4219,109,Honduras,<=50K
97,21,Federal-gov,-145204,Some-college,-3,Separated,Transport-moving,Own-child,Other,Female,-15258,-1259,48,England,>50K
98,75,?,-145204,Doctorate,16,Separated,Protective-serv,Own-child,Amer-Indian-Eskimo,Female,-886,-1259,109,Puerto-Rico,<=50K


In [14]:
tabddpm_resnet.sample(100)

Sample using DDIM.
Sample timestep    0


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,121,?,-217336,Assoc-acdm,12,Married-AF-spouse,Handlers-cleaners,Wife,Amer-Indian-Eskimo,Male,-15258,4219,109,Cuba,>50K
1,1,Local-gov,-217336,Some-college,19,Married-spouse-absent,Handlers-cleaners,Not-in-family,Other,Female,27540,-1259,109,Cuba,>50K
2,21,?,798960,9th,12,Divorced,Machine-op-inspct,Unmarried,Other,Male,27540,-1259,48,Cuba,>50K
3,1,State-gov,-145204,Some-college,7,Married-AF-spouse,Machine-op-inspct,Unmarried,Asian-Pac-Islander,Male,896,-162,-24,Mexico,>50K
4,121,Federal-gov,-145204,Assoc-voc,6,Married-civ-spouse,Other-service,Wife,Other,Male,27540,-162,48,South,>50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,75,Self-emp-inc,495625,9th,19,Married-AF-spouse,Handlers-cleaners,Husband,White,Male,27540,4219,109,Puerto-Rico,>50K
96,-19,Self-emp-not-inc,798960,Doctorate,19,Married-AF-spouse,Transport-moving,Other-relative,Amer-Indian-Eskimo,Female,27540,4219,109,South,>50K
97,-19,Federal-gov,-217336,Masters,6,Married-AF-spouse,?,Not-in-family,Amer-Indian-Eskimo,Male,27540,4219,109,South,<=50K
98,121,Federal-gov,-217336,Assoc-voc,6,Never-married,Handlers-cleaners,Other-relative,Other,Female,-15258,4219,109,Puerto-Rico,>50K


In [15]:
ctgan.sample(100)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,28,Federal-gov,-4312,HS-grad,8,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,72,40,-14,United-States,<=50K
1,6,Private,22662,Some-college,10,Never-married,Sales,Unmarried,White,Male,-56,-15,43,?,<=50K
2,30,Private,167204,HS-grad,10,Never-married,Prof-specialty,Not-in-family,White,Female,-129,-8,15,United-States,>50K
3,23,Private,55609,Bachelors,8,Never-married,Exec-managerial,Not-in-family,White,Male,-25,37,75,Jamaica,<=50K
4,15,Private,45773,Bachelors,12,Separated,Other-service,Unmarried,White,Female,481,-26,65,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,50,State-gov,188045,5th-6th,7,Divorced,Prof-specialty,Husband,White,Male,-211,-26,15,United-States,>50K
96,39,Federal-gov,268452,Some-college,10,Married-civ-spouse,Protective-serv,Not-in-family,White,Female,3475,-2,49,United-States,<=50K
97,14,Private,-44568,Some-college,8,Married-civ-spouse,Machine-op-inspct,Own-child,White,Male,-405,50,36,United-States,<=50K
98,41,Local-gov,182463,7th-8th,8,Never-married,Prof-specialty,Husband,White,Male,409,96,80,United-States,<=50K


In [16]:
tvae.sample(100)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,28,Private,79451,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,-38,-2,41,United-States,<=50K
1,26,Private,145423,HS-grad,10,Never-married,Other-service,Own-child,White,Male,40,-4,39,United-States,<=50K
2,47,Private,151960,Bachelors,14,Married-civ-spouse,Prof-specialty,Husband,White,Male,-8,-2,40,United-States,<=50K
3,30,Private,232602,HS-grad,9,Never-married,Other-service,Not-in-family,White,Male,51,1,39,United-States,<=50K
4,31,Private,281315,Some-college,9,Never-married,Sales,Not-in-family,White,Male,-19,-1,38,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,22,Private,161728,Some-college,10,Married-civ-spouse,Prof-specialty,Not-in-family,White,Male,59,1,37,United-States,<=50K
96,52,Private,168715,Some-college,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,-73,2,38,United-States,<=50K
97,13,Private,265141,Some-college,10,Married-civ-spouse,Sales,Not-in-family,White,Male,40,-9,52,United-States,<=50K
98,38,Private,210596,Some-college,15,Married-civ-spouse,Prof-specialty,Not-in-family,White,Male,72,0,42,United-States,<=50K


In [19]:
from sklearn.linear_model import RidgeClassifier
tvae.eval_ml(real_data=real_data, target_name='sex', task='classification', model=RidgeClassifier(), metrics=['classification_report'])

{'real': {'classification_report':               precision    recall  f1-score      support
  0              0.775424  0.736077  0.755238  3232.000000
  1              0.872706  0.894600  0.883517  6537.000000
  accuracy       0.842154  0.842154  0.842154     0.842154
  macro avg      0.824065  0.815338  0.819378  9769.000000
  weighted avg   0.840521  0.842154  0.841077  9769.000000},
 'fake': {'classification_report':               precision    recall  f1-score      support
  0              0.187500  0.002785  0.005488  3232.000000
  1              0.668450  0.994034  0.799360  6537.000000
  accuracy       0.666087  0.666087  0.666087     0.666087
  macro avg      0.427975  0.498409  0.402424  9769.000000
  weighted avg   0.509331  0.666087  0.536714  9769.000000}}