## Data Loading

In [1]:
from sdv.datasets.demo import download_demo

real_data,metadata = download_demo(
    modality='single_table',
    dataset_name='fake_hotel_guests'
)

In [2]:
real_data.head()

Unnamed: 0,guest_email,has_rewards,room_type,amenities_fee,checkin_date,checkout_date,room_rate,billing_address,credit_card_number
0,michaelsanders@shaw.net,False,BASIC,37.89,27 Dec 2020,29 Dec 2020,131.23,"49380 Rivers Street\nSpencerville, AK 68265",4075084747483975747
1,randy49@brown.biz,False,BASIC,24.37,30 Dec 2020,02 Jan 2021,114.43,"88394 Boyle Meadows\nConleyberg, TN 22063",180072822063468
2,webermelissa@neal.com,True,DELUXE,0.0,17 Sep 2020,18 Sep 2020,368.33,"0323 Lisa Station Apt. 208\nPort Thomas, LA 82585",38983476971380
3,gsims@terry.com,False,BASIC,,28 Dec 2020,31 Dec 2020,115.61,"77 Massachusetts Ave\nCambridge, MA 02139",4969551998845740
4,misty33@smith.biz,False,BASIC,16.45,05 Apr 2020,,122.41,"1234 Corporate Drive\nBoston, MA 02116",3558512986488983


## Creating a synthesizer

In [3]:
from sdv.lite import SingleTablePreset

synthesizer = SingleTablePreset(
        metadata,
        name='FAST_ML'
)

In [4]:
synthesizer.fit(
        data=real_data
)

## Generating synthetic data

In [5]:
synthetic_data = synthesizer.sample(
        num_rows=500
)
synthetic_data.head()

Unnamed: 0,guest_email,has_rewards,room_type,amenities_fee,checkin_date,checkout_date,room_rate,billing_address,credit_card_number
0,dsullivan@example.net,False,DELUXE,10.385184,03 Apr 2020,23 Apr 2020,149.354932,"90469 Karla Knolls Apt. 781\nSusanberg, CA 70033",5161033759518983
1,steven59@example.org,False,BASIC,,04 Jul 2020,24 Aug 2020,179.634314,"6108 Carla Ports Apt. 116\nPort Evan, MI 71694",4133047413145475690
2,brandon15@example.net,False,BASIC,22.700956,20 Apr 2020,14 Apr 2020,145.658788,86709 Jeremy Manors Apt. 786\nPort Garychester...,4977328103788
3,humphreyjennifer@example.net,False,BASIC,23.497404,20 May 2020,05 Jun 2020,187.945019,"8906 Bobby Trail\nEast Sandra, NY 43986",3524946844839485
4,joshuabrown@example.net,False,DELUXE,20.162318,05 Jan 2020,07 Jan 2020,190.691273,"732 Dennis Lane\nPort Nicholasstad, DE 49786",4446905799576890978


## Evaluating data

### Diagnostic

In [6]:
from sdv.evaluation.single_table import run_diagnostic

diagnostic = run_diagnostic(
    real_data=real_data,
    synthetic_data=synthetic_data,
    metadata=metadata
)

Generating report ...
(1/2) Evaluating Data Validity: : 100%|████████████████████████████████████████████████| 9/9 [00:00<00:00, 1498.98it/s]
(2/2) Evaluating Data Structure: : 100%|████████████████████████████████████████████████| 1/1 [00:00<00:00, 500.04it/s]

Overall Score: 100.0%

Properties:
- Data Validity: 100.0%
- Data Structure: 100.0%


### Visualizing data

In [7]:
from sdv.evaluation.single_table import get_column_plot

fig = get_column_plot(
    real_data=real_data,
    synthetic_data=synthetic_data,
    column_name='amenities_fee',
    metadata=metadata
)

fig.show()