## Imports

In [14]:
import pandas as pd
import numpy as np
from sdv.single_table import CTGANSynthesizer
import datetime

### Load original Dataset

In [None]:
carclaims = pd.read_csv('./carclaims_original.csv')

### Create Metadata of dataframe
SVD needs metadata information for creating the synthesizer. We can auto-generate the metadata using the below code provided in the svd docs.

In [3]:
from sdv.metadata import Metadata

# 1. auto-detect metadata based in your data
metadata = Metadata.detect_from_dataframes(
    data={
        'carclaims': carclaims
    })

# 2. carefully inspect and update your metadata
# Does not work in WSL, needs to be investigated further
# metadata.visualize()

metadata.validate()

# 3. when you're done, save it to a file for future use
metadata.save_to_json('carclaims_metadata.json')



### Create the CTGAN syhthesizer and fit it to the data

In [5]:
synthesizer = CTGANSynthesizer(metadata)
synthesizer.fit(carclaims)

### Sample the synthetic data using the fitted synthesizer

In [19]:
synthetic_data = synthesizer.sample(num_rows=1000000) # Returns numpy array
synthetic_df = pd.DataFrame(synthetic_data)
synthetic_df.to_csv(f'./carclaims_synthetic_{datetime.datetime.now()}.csv', index=False)