# Import Packages

In [1]:
# pip install sdv

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime as dt
import re
import time
from sdv.tabular import GaussianCopula, CTGAN, CopulaGAN, TVAE
from sdv.constraints import Inequality, create_custom_constraint
from copy import deepcopy
import warnings
from itertools import combinations
from sd_generator import SD_generator
import pprint

In [3]:
warnings.filterwarnings("ignore")

# Prepare Data

In [4]:
data = pd.read_csv('data.csv')
data.drop(columns=['Name'], axis=1, inplace=True)
data.head()

Unnamed: 0,Date_of_birth,Opening_date,Expiry_date,Credit_limit,Statement_balance,Available_credit
0,1949-08-17,2008-09-02,2013-09-02,38000.0,29297.69,8702.31
1,1938-07-04,2019-09-08,2024-09-08,19500.0,15628.5,3871.5
2,1927-03-28,2019-04-23,2024-04-23,11500.0,7388.34,4111.66
3,1936-01-17,2021-10-27,2026-10-27,68500.0,32060.39,36439.61
4,1960-09-05,2010-10-02,2015-10-02,72000.0,56148.13,15851.87


In [5]:
data['Opening'] = data['Opening_date'].apply(lambda x: x.replace('-', ''))
data['Expiry'] = data['Expiry_date'].apply(lambda x: x.replace('-', ''))
data['Client_id'] = data['Opening'] + data['Expiry']
data['Client_id'] = data['Client_id'].apply(lambda x: int(x))
data['Opening'] = data['Opening'].apply(lambda x: int(x))
data['Expiry'] = data['Expiry'].apply(lambda x: int(x))

data = data[['Client_id', 'Date_of_birth', 'Opening_date', 'Expiry_date', 'Credit_limit', 'Statement_balance', 'Available_credit', 'Opening', 'Expiry']]

data.head()

Unnamed: 0,Client_id,Date_of_birth,Opening_date,Expiry_date,Credit_limit,Statement_balance,Available_credit,Opening,Expiry
0,2008090220130902,1949-08-17,2008-09-02,2013-09-02,38000.0,29297.69,8702.31,20080902,20130902
1,2019090820240908,1938-07-04,2019-09-08,2024-09-08,19500.0,15628.5,3871.5,20190908,20240908
2,2019042320240423,1927-03-28,2019-04-23,2024-04-23,11500.0,7388.34,4111.66,20190423,20240423
3,2021102720261027,1936-01-17,2021-10-27,2026-10-27,68500.0,32060.39,36439.61,20211027,20261027
4,2010100220151002,1960-09-05,2010-10-02,2015-10-02,72000.0,56148.13,15851.87,20101002,20151002


In [6]:
data.dtypes

Client_id              int64
Date_of_birth         object
Opening_date          object
Expiry_date           object
Credit_limit         float64
Statement_balance    float64
Available_credit     float64
Opening                int64
Expiry                 int64
dtype: object

# Compare Synthetic Data Generated with & without Constraints Applied

## Generate Synthetic Data with Constraints

In [7]:
d1 = SD_generator(data, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d1.preprocess()
d1.detect_inequality()
d1.detect_arithmetic_equality()
d1.detect_arithmetic_inequality()
d1.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0976 seconds
8 relationships detected
Execution Time: 0.628 seconds
1 relationships detected
Execution Time: 1.5079 seconds
8 relationships detected
Execution Time: 0.6578 seconds
2 relationships detected
Execution Time: 0.5114 seconds


### Original Relationships Detected:

In [8]:
print("Inequality: ")
pprint.pprint(d1.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d1.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d1.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d1.inclusive_dict)a

Inequality: 
{'Client_id': ['Expiry'],
 'Credit_limit': ['Statement_balance', 'Available_credit'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date', 'Credit_limit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{'Credit_limit': [['Statement_balance', 'Available_credit']]}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000010600000000.0]],
 'Credit_limit': [['Statement_balance', 3.9400000000023283],
                  ['Available_credit', 7.260000000000218]],
 'Expiry': [['Opening', 49999.0]],
 'Expiry_date': [['Opening_date', 1826.0]],
 'Opening': [['Expiry_date', 19961750.0], ['Credit_limit', 19900301.0]],
 'Opening_date': [['Date_of_birth', 6599.0]]}
Inclusive: 
{'Client_id': [['Opening', 0], ['Expiry', 8]]}


### Create Models

In [9]:
d1.create_constraints(inequality=True, arithmetic_equality=True, arithmetic_inequality=True, inclusive=True)
d1.apply_model("GaussianCopula")
d1.apply_model("CTGAN")
d1.apply_model("CopulaGAN")
d1.apply_model("TVAE")

Constrainsts created successfully!
Execution Time: 0.0005 seconds
Execution Time for training GaussianCopula: 0.1879 seconds
Execution Time for training CTGAN: 51.9239 seconds
Execution Time for training CopulaGAN: 45.1873 seconds
Execution Time for training TVAE: 17.0022 seconds


### Generate Synthetic Data

In [10]:
with_constraint_GaussianCopula = d1.generate("GaussianCopula", num_rows=2000)
with_constraint_CTGAN = d1.generate("CTGAN", num_rows=2000)
with_constraint_CopulaGAN = d1.generate("CopulaGAN", num_rows=2000)
with_constraint_TVAE = d1.generate("TVAE", num_rows=2000)

Sampling rows: 100%|██████████████████████| 2000/2000 [00:00<00:00, 8095.85it/s]


Synthetic data generated successfully with GaussianCopula model!
Execution Time: 0.6402 seconds


Sampling rows: 100%|██████████████████████| 2000/2000 [00:00<00:00, 3248.08it/s]


Synthetic data generated successfully with CTGAN model!
Execution Time: 0.9365 seconds


Sampling rows: 100%|██████████████████████| 2000/2000 [00:00<00:00, 2637.19it/s]


Synthetic data generated successfully with CopulaGAN model!
Execution Time: 1.0619 seconds


Sampling rows: 100%|██████████████████████| 2000/2000 [00:00<00:00, 8274.58it/s]


Synthetic data generated successfully with TVAE model!
Execution Time: 0.5449 seconds


### Detect Relationships on Synthetic Data

### Synthetic Data Generated using GaussianCopula with constraints

In [11]:
d2 = SD_generator(with_constraint_GaussianCopula, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d2.preprocess()
d2.detect_inequality()
d2.detect_arithmetic_equality()
d2.detect_arithmetic_inequality()
d2.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0021 seconds
9 relationships detected
Execution Time: 0.613 seconds
1 relationships detected
Execution Time: 1.4938 seconds
9 relationships detected
Execution Time: 0.6917 seconds
2 relationships detected
Execution Time: 0.4964 seconds


### New Relationships Detected:

In [12]:
print("Inequality: ")
pprint.pprint(d2.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d2.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d2.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d2.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Credit_limit': ['Date_of_birth', 'Statement_balance', 'Available_credit'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date', 'Credit_limit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{'Credit_limit': [['Statement_balance', 'Available_credit']]}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2010766900000000.0]],
 'Credit_limit': [['Date_of_birth', 274.302098961758],
                  ['Statement_balance', 17.512694373888735],
                  ['Available_credit', 207.61000000000058]],
 'Expiry': [['Opening', 50020.0]],
 'Expiry_date': [['Opening_date', 1832.2330463282997]],
 'Opening': [['Expiry_date', 20065052.0], ['Credit_limit', 19939271.622005597]],
 'Opening_date': [['Date_of_birth', 6945.565241307646]]}
Inclusive: 
{'Client_id': [['Opening', 0], ['Expiry', 8]]}


### Synthetic Data Generated using CTGAN with constraints

In [13]:
d3 = SD_generator(with_constraint_CTGAN, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d3.preprocess()
d3.detect_inequality()
d3.detect_arithmetic_equality()
d3.detect_arithmetic_inequality()
d3.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0042 seconds
9 relationships detected
Execution Time: 0.6107 seconds
1 relationships detected
Execution Time: 1.4849 seconds
9 relationships detected
Execution Time: 0.7043 seconds
2 relationships detected
Execution Time: 0.5775 seconds


### New Relationships Detected:

In [14]:
print("Inequality: ")
pprint.pprint(d3.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d3.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d3.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d3.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Credit_limit': ['Statement_balance', 'Available_credit'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date', 'Available_credit'],
 'Opening': ['Expiry_date', 'Credit_limit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{'Credit_limit': [['Statement_balance', 'Available_credit']]}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000165000000000.0]],
 'Credit_limit': [['Statement_balance', 3.9400000000023283],
                  ['Available_credit', 7.259999999998399]],
 'Expiry': [['Opening', 50043.0]],
 'Expiry_date': [['Opening_date', 1844.997945750125],
                 ['Available_credit', 116.66641086962773]],
 'Opening': [['Expiry_date', 19961766.0], ['Credit_limit', 19876956.963397555]],
 'Opening_date': [['Date_of_birth', 6603.140113306736]]}
Inclusive: 
{'Client_id': [['Opening', 0], ['Expiry', 8]]}


### Synthetic Data Generated using CopulaGAN with constraints

In [15]:
d4 = SD_generator(with_constraint_CopulaGAN, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d4.preprocess()
d4.detect_inequality()
d4.detect_arithmetic_equality()
d4.detect_arithmetic_inequality()
d4.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0016 seconds
8 relationships detected
Execution Time: 0.6056 seconds
1 relationships detected
Execution Time: 1.4417 seconds
8 relationships detected
Execution Time: 0.601 seconds
2 relationships detected
Execution Time: 0.3967 seconds


### New Relationships Detected:

In [16]:
print("Inequality: ")
pprint.pprint(d4.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d4.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d4.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d4.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Credit_limit': ['Statement_balance', 'Available_credit'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date', 'Credit_limit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{'Credit_limit': [['Statement_balance', 'Available_credit']]}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000033000000000.0]],
 'Credit_limit': [['Statement_balance', 4.0770120168162975],
                  ['Available_credit', 61.1200000000008]],
 'Expiry': [['Opening', 50071.0]],
 'Expiry_date': [['Opening_date', 1850.2910855689915]],
 'Opening': [['Expiry_date', 19961960.0], ['Credit_limit', 19851140.12962596]],
 'Opening_date': [['Date_of_birth', 6630.854306798465]]}
Inclusive: 
{'Client_id': [['Opening', 0], ['Expiry', 8]]}


### Synthetic Data Generated using TVAE with constraints

In [17]:
d5 = SD_generator(with_constraint_TVAE, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d5.preprocess()
d5.detect_inequality()
d5.detect_arithmetic_equality()
d5.detect_arithmetic_inequality()
d5.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0028 seconds
8 relationships detected
Execution Time: 0.6068 seconds
1 relationships detected
Execution Time: 1.419 seconds
8 relationships detected
Execution Time: 0.6089 seconds
2 relationships detected
Execution Time: 0.4872 seconds


### New Relationships Detected:

In [18]:
print("Inequality: ")
pprint.pprint(d5.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d5.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d5.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d5.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Credit_limit': ['Statement_balance', 'Available_credit'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date', 'Credit_limit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{'Credit_limit': [['Statement_balance', 'Available_credit']]}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000138600000000.0]],
 'Credit_limit': [['Statement_balance', 3.9400000000023283],
                  ['Available_credit', 7.259999999994761]],
 'Expiry': [['Opening', 50009.0]],
 'Expiry_date': [['Opening_date', 1829.5497543957754]],
 'Opening': [['Expiry_date', 19961880.0], ['Credit_limit', 19903935.057645496]],
 'Opening_date': [['Date_of_birth', 6603.961890450475]]}
Inclusive: 
{'Client_id': [['Opening', 0], ['Expiry', 8]]}


## Generate Synthetic Data without Constraints

In [19]:
d6 = SD_generator(data, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d6.preprocess()
d6.detect_inequality()
d6.detect_arithmetic_equality()
d6.detect_arithmetic_inequality()
d6.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0036 seconds
8 relationships detected
Execution Time: 0.5975 seconds
1 relationships detected
Execution Time: 1.4179 seconds
8 relationships detected
Execution Time: 0.6017 seconds
2 relationships detected
Execution Time: 0.4931 seconds


### Original Relationships Detected:

In [20]:
print("Inequality: ")
pprint.pprint(d6.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d6.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d6.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d6.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Credit_limit': ['Statement_balance', 'Available_credit'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date', 'Credit_limit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{'Credit_limit': [['Statement_balance', 'Available_credit']]}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000010600000000.0]],
 'Credit_limit': [['Statement_balance', 3.9400000000023283],
                  ['Available_credit', 7.260000000000218]],
 'Expiry': [['Opening', 49999.0]],
 'Expiry_date': [['Opening_date', 1826.0]],
 'Opening': [['Expiry_date', 19961750.0], ['Credit_limit', 19900301.0]],
 'Opening_date': [['Date_of_birth', 6599.0]]}
Inclusive: 
{'Client_id': [['Opening', 0], ['Expiry', 8]]}


### Create Models

In [21]:
d6.create_constraints(inequality=False, arithmetic_equality=False, arithmetic_inequality=False, inclusive=False)
d6.apply_model("GaussianCopula")
d6.apply_model("CTGAN")
d6.apply_model("CopulaGAN")
d6.apply_model("TVAE")

Constrainsts created successfully!
Execution Time: 0.0 seconds
Execution Time for training GaussianCopula: 0.1602 seconds
Execution Time for training CTGAN: 45.3612 seconds
Execution Time for training CopulaGAN: 48.0548 seconds
Execution Time for training TVAE: 16.539 seconds


### Generate Synthetic Data

In [22]:
without_constraint_GaussianCopula = d6.generate("GaussianCopula", num_rows=2000)
without_constraint_CTGAN = d6.generate("CTGAN", num_rows=2000)
without_constraint_CopulaGAN = d6.generate("CopulaGAN", num_rows=2000)
without_constraint_TVAE = d6.generate("TVAE", num_rows=2000)

Synthetic data generated successfully with GaussianCopula model!
Execution Time: 0.0714 seconds
Synthetic data generated successfully with CTGAN model!
Execution Time: 0.1057 seconds
Synthetic data generated successfully with CopulaGAN model!
Execution Time: 0.143 seconds
Synthetic data generated successfully with TVAE model!
Execution Time: 0.0856 seconds


### Detect Relationships on Synthetic Data

### Synthetic Data Generated using GaussianCopula without constraints

In [23]:
d7 = SD_generator(without_constraint_GaussianCopula, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d7.preprocess()
d7.detect_inequality()
d7.detect_arithmetic_equality()
d7.detect_arithmetic_inequality()
d7.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0032 seconds
8 relationships detected
Execution Time: 0.6626 seconds
0 relationships detected
Execution Time: 1.439 seconds
8 relationships detected
Execution Time: 0.6181 seconds
0 relationships detected
Execution Time: 0.5049 seconds


### New Relationships detected:

In [24]:
print("Inequality: ")
pprint.pprint(d7.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d7.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d7.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d7.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date',
             'Credit_limit',
             'Statement_balance',
             'Available_credit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000057331829909.0]],
 'Expiry': [['Opening', 49089.0]],
 'Expiry_date': [['Opening_date', 1820.0]],
 'Opening': [['Expiry_date', 19961874.0],
             ['Credit_limit', 19901273.0],
             ['Statement_balance', 19920305.2],
             ['Available_credit', 19910242.860068828]],
 'Opening_date': [['Date_of_birth', 907.0]]}
Inclusive: 
{}


### Synthetic Data Generated using CTGAN without constraints

In [25]:
d8 = SD_generator(without_constraint_CTGAN, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d8.preprocess()
d8.detect_inequality()
d8.detect_arithmetic_equality()
d8.detect_arithmetic_inequality()
d8.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0026 seconds
14 relationships detected
Execution Time: 0.6279 seconds
0 relationships detected
Execution Time: 1.2947 seconds
14 relationships detected
Execution Time: 1.0719 seconds
0 relationships detected
Execution Time: 0.6092 seconds


### New Relationships detected:

In [26]:
print("Inequality: ")
pprint.pprint(d8.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d8.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d8.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d8.inclusive_dict)

Inequality: 
{'Client_id': ['Opening', 'Expiry'],
 'Expiry': ['Opening_date',
            'Expiry_date',
            'Credit_limit',
            'Statement_balance',
            'Available_credit'],
 'Expiry_date': ['Date_of_birth'],
 'Opening': ['Opening_date',
             'Expiry_date',
             'Credit_limit',
             'Statement_balance',
             'Available_credit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{}
Arithmetic Inequality: 
{'Client_id': [['Opening', 2000010599831024.0], ['Expiry', 2000010599778876.0]],
 'Expiry': [['Opening_date', 20005183.0],
            ['Expiry_date', 20003598.0],
            ['Credit_limit', 19950106.0],
            ['Statement_balance', 19955769.97],
            ['Available_credit', 19951337.67]],
 'Expiry_date': [['Date_of_birth', 585.0]],
 'Opening': [['Opening_date', 19955183.0],
             ['Expiry_date', 19953357.0],
             ['Credit_limit', 19902867.0],
             ['Statement_balance', 19901941.96],
    

### Synthetic Data Generated using CopulaGAN without constraints

In [27]:
d9 = SD_generator(without_constraint_CopulaGAN, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d9.preprocess()
d9.detect_inequality()
d9.detect_arithmetic_equality()
d9.detect_arithmetic_inequality()
d9.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.003 seconds
14 relationships detected
Execution Time: 0.6024 seconds
0 relationships detected
Execution Time: 1.2435 seconds
14 relationships detected
Execution Time: 1.0499 seconds
0 relationships detected
Execution Time: 0.4053 seconds


### New Relationships detected:

In [28]:
print("Inequality: ")
pprint.pprint(d9.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d9.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d9.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d9.inclusive_dict)

Inequality: 
{'Client_id': ['Opening', 'Expiry'],
 'Expiry': ['Opening_date',
            'Expiry_date',
            'Credit_limit',
            'Statement_balance',
            'Available_credit'],
 'Expiry_date': ['Date_of_birth'],
 'Opening': ['Opening_date',
             'Expiry_date',
             'Credit_limit',
             'Statement_balance',
             'Available_credit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{}
Arithmetic Inequality: 
{'Client_id': [['Opening', 2000010601324705.0], ['Expiry', 2000010601216998.0]],
 'Expiry': [['Opening_date', 20005503.0],
            ['Expiry_date', 20003753.0],
            ['Credit_limit', 19952339.0],
            ['Statement_balance', 19963040.99],
            ['Available_credit', 19960931.59353219]],
 'Expiry_date': [['Date_of_birth', 1319.0]],
 'Opening': [['Opening_date', 19955269.0],
             ['Expiry_date', 19954457.0],
             ['Credit_limit', 19900661.0],
             ['Statement_balance', 19905387.74

### Synthetic Data Generated using TVAE without constraints

In [29]:
d10 = SD_generator(without_constraint_TVAE, inequality_threshold=0.95, arithmetic_equality_threshold=0.95, inclusive_threshold=1.0)
d10.preprocess()
d10.detect_inequality()
d10.detect_arithmetic_equality()
d10.detect_arithmetic_inequality()
d10.detect_inclusive()

Date types reformatted and missing values handled successfully!
Execution Time: 0.0029 seconds
8 relationships detected
Execution Time: 0.6041 seconds
0 relationships detected
Execution Time: 1.3935 seconds
8 relationships detected
Execution Time: 0.6041 seconds
0 relationships detected
Execution Time: 0.5712 seconds


### New Relationships detected:

In [30]:
print("Inequality: ")
pprint.pprint(d10.inequality_dict)
print("Arithmetic Equality: ")
pprint.pprint(d10.arithmetic_equality_dict)
print("Arithmetic Inequality: ")
pprint.pprint(d10.arithmetic_inequality_dict)
print("Inclusive: ")
pprint.pprint(d10.inclusive_dict)

Inequality: 
{'Client_id': ['Expiry'],
 'Expiry': ['Opening'],
 'Expiry_date': ['Opening_date'],
 'Opening': ['Expiry_date',
             'Credit_limit',
             'Statement_balance',
             'Available_credit'],
 'Opening_date': ['Date_of_birth']}
Arithmetic Equality: 
{}
Arithmetic Inequality: 
{'Client_id': [['Expiry', 2000010599907920.0]],
 'Expiry': [['Opening', 1823.0]],
 'Expiry_date': [['Opening_date', 296.0]],
 'Opening': [['Expiry_date', 19957808.0],
             ['Credit_limit', 19909166.0],
             ['Statement_balance', 19940551.15],
             ['Available_credit', 19912392.922545504]],
 'Opening_date': [['Date_of_birth', 1134.0]]}
Inclusive: 
{}
