In [1]:
import numpy as np
import pandas as pd
from datetime import datetime as dt
import re
import time
from sdv.tabular import GaussianCopula, CTGAN, CopulaGAN, TVAE
from sdv.constraints import Range
from sdv.constraints import ScalarRange
from sdv.constraints import Inequality
from sdv.constraints import ScalarInequality

In [8]:
import os
os.chdir("/Users/santoshitadanki/Synthetic_Data_Generation_DS/data")
# os.chdir("/Users/santoshitadanki/Synthetic_Data_Generation_DS/data")

data = pd.read_csv('data_500k.csv')
print(data.shape)
display(data.head())
data.dtypes

(500000, 6)


Unnamed: 0,Date_of_birth,Opening_date,Expiry_date,Credit_limit,Statement_balance,Available_credit
0,1907-02-05,2002-11-12,2007-11-12,39500.0,23047.49,16452.51
1,1953-12-10,2022-10-09,2027-10-09,13500.0,11144.41,2355.59
2,1942-07-13,2005-12-13,2010-12-13,26000.0,17511.56,8488.44
3,1948-05-05,2018-02-13,2023-02-13,88500.0,11245.66,77254.34
4,1956-06-28,2009-03-20,2014-03-20,62000.0,27648.2,34351.8


Date_of_birth         object
Opening_date          object
Expiry_date           object
Credit_limit         float64
Statement_balance    float64
Available_credit     float64
dtype: object

In [62]:
class Detector():
    """
    A class to detect deterministic relationships between two/three columns from a given dataset.

    ...

    Attributes
    ----------
    data : Pandas DataFrame
        an input dataset in Pandas DataFrame format
        
    threshold : float
        a cut-off percentage for detection functions to confirm the deterministic relationships
        
    inequality_dict : dictionary
        stores inequality deterministic relationships;
        in which the key is greater than its values
    
    ABC_dict : dictionary
        stores deterministic relationships like "A = B + C" among three colomns
    
    ABX_dict : dictionary
        stores deterministic relationships like "A = B + X" between two colomns
        
    Methods
    -------
    preprocess():
        Change the dtpyes of date columns to float and 
        drop the rows of the input dataframe which have missing values.
    
    detect_inequality():
        Detect the inequality deterministic relationship between two colomns.
    
    detect_ABC():
        Detect the deterministic relationships like "A = B + C" among three colomns.
        
    detect_ABX():
        Detect the deterministic relationships like "A >= B + X" between two colomns.
        
    create_constraints():
        Create constraints for synthetic data generation model training.
    """
    def __init__(self, data, threshold, 
                 inequality_dict={}, inequality_runtime=0,
                 ABC_dict={}, ABC_runtime=0, 
                 ABX_dict={}, ABX_runtime=0,
                 constraints=[]):
        """
        Constructs all the necessary attributes for the person object.

        Parameters
        ----------
            dataframe : Pandas DataFrame
                an input dataset in Pandas DataFrame format
                
            threshold : float
                a cut-off percentage for detection functions to confirm the deterministic relationships
                
            inequality_dict : dictionary
                an empty dictionary to store inequality deterministic relationships
    
            ABC_dict : dictionary
                an empty dictionary to store deterministic relationships among three columns like "A = B + C"
            
            ABX_dict : dictionary
                an empty dictionary to store deterministic relationships between two columnslike "A >= B + X"
            
            constarints : list
                an empty list to store contraints for synthetic data generation model training
        """    
        self.data = data
        self.threshold = threshold
        
        self.inequality_dict = inequality_dict
        self.inequality_runtime = inequality_runtime
        
        self.ABC_dict = ABC_dict
        self.ABC_runtime = ABC_runtime
        
        self.ABX_dict = ABX_dict 
        self.ABX_runtime = ABX_runtime
        
        self.constraints = constraints
        
    def preprocess(self):
        """
        Change date columns to float format;
        Handle missing values of the input dataframe;
        Drop the rows with missing values.
        
        Returns:
            None.
        
        Output:
            Running finished message with execution time.
        """
        st = time.time()
        
        data = self.data
        
        ref_dt = pd.Timestamp('1900-01-01')

        str2date = lambda x: dt.strptime(x, "%Y-%m-%d") - ref_dt if x.replace(" ", "") else np.nan
        
        for col in data.columns:
            
            try:
                re.match('^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$', data[col][0])
                
                self.data[col] = self.data[col].apply(str2date)
                self.data[col] = (self.data[col] / np.timedelta64(1, 'D')).astype(float)
                
            except:
                pass
            
        data.dropna(axis=0, inplace=True)
        self.data = data

        et = time.time()
        elapsed_time = et - st
        print("Date types reformatted and missing values handled successfully!\nExecution Time:"
              , round(elapsed_time, 4), "seconds")

    def detect_inequality(self):
        """
        Detect the inequality deterministic relationship between colomns;
        Update the inequality_dictionary of the class object.
        
        Returns:
            None.
        
        Output:
            Number of relationships detected with execution time.
        """
        st = time.time()
        
        data = self.data
        inequality_dict = {}
        
    # Looping through all combinitions of columns
        for i in range(len(data.columns)):
            for j in range(i+1, len(data.columns)):
                
                # Extract the column pairs in float format
                if (data[data.columns[i]].dtypes == 'float' and data[data.columns[j]].dtypes == 'float'):
                    diff = []
                    count = 0
                    for rownum, row in data.iterrows():
                        diff.append(row[i] - row[j])
                    for num in diff:
                        if num < 0:
                            count+=1 
                    if float(count)/len(diff) >= self.threshold:
                        if data.columns[j] in inequality_dict.keys():
                            inequality_dict[data.columns[j]].append(data.columns[i])
                        else:
                            inequality_dict[data.columns[j]] = []
                            inequality_dict[data.columns[j]].append(data.columns[i])
                    elif float(len(diff) - count)/len(diff) >= self.threshold:
                        if data.columns[i] in inequality_dict.keys():
                            inequality_dict[data.columns[i]].append(data.columns[j])
                        else:
                            inequality_dict[data.columns[i]] = []
                            inequality_dict[data.columns[i]].append(data.columns[j])
    
        self.inequality_dict = inequality_dict
        
        et = time.time()
        elapsed_time = et - st
        self.inequality_runtime = elapsed_time
        
        print(len(inequality_dict), "relationships detected")
        print("Execution Time:", round(self.inequality_runtime, 4), "seconds")

    def detect_ABC(self):
        """
        Detect the deterministic relationships "A = B + C" among three columns;
        Based on the dictionary of inequality deterministic relationships.
        
        Returns:
            None.
        
        Output:
            Number of relationships detected with execution time.    
        """
        st = time.time()
        
        data = self.data
        ABC_dict = {}
        
        for key in self.inequality_dict:
            # Check if the len(the list of values of the key) is >= 2 
            if len(self.inequality_dict[key]) >= 2:
                for i in range(len(self.inequality_dict[key])):
                    for j in range(i+1, len(self.inequality_dict[key])):
                        count = 0
                        for index, row in data.iterrows():
                            if (row[key] == row[self.inequality_dict[key][i]] + row[self.inequality_dict[key][j]]):
                                count += 1
                        if float(count) / len(data.index) >= self.threshold:
                            if key in ABC_dict.keys():
                                ABC_dict[key].append([self.inequality_dict[key][i], self.inequality_dict[key][j]])
                            else:
                                ABC_dict[key] = []
                                ABC_dict[key].append([self.inequality_dict[key][i], self.inequality_dict[key][j]])
                 
        self.ABC_dict = ABC_dict
        
        et = time.time()
        elapsed_time = et - st
        self.ABC_runtime = elapsed_time
        
        print(len(ABC_dict), "relationships detected")
        print("Execution Time:", round(self.ABC_runtime, 4), "seconds")
    
    def detect_ABX(self):
        """
        Detect the deterministic relationships "A >= B + X" between two columns;
        Based on the dictionary of inequality deterministic relationships.
        
        Returns:
            None.
        
        Output:
            Number of relationships detected with execution time. 
        """
        st = time.time()
        
        data = self.data
        ABX_dict = {}
        
        for key in self.inequality_dict:
            for value in self.inequality_dict[key]:
                diff = []
                for index, row in data.iterrows():
                    diff.append(row[key] - row[value])
                    
                if key in ABX_dict.keys():
                    ABX_dict[key].append([value, min(diff)])
                else:
                    ABX_dict[key] = []
                    ABX_dict[key].append([value, min(diff)])
        
        self.ABX_dict = ABX_dict
            
        et = time.time()
        elapsed_time = et - st
        self.ABX_runtime = elapsed_time
        
        print(len(ABX_dict), "relationships detected")
        print("Execution Time:", round(self.ABX_runtime, 4), "seconds")
    
    def create_constraints(self, inequality=True, ABC=False, ABX=False):
        """
        Create constraints for synthetic data generation model training.
        
        Parameters:
            inequality: bool, default=True
                If True, create constraints for inequality deterministic relationships.
            
            ABC: bool, default=True
                If True, create constraints for deterministic relationships like "A = B + C".
            
            ABC: bool, default=True
                If True, create constraints for deterministic relationships like "A = B + X".
        
        Returns:
            None.
        
        Output:
            Running finished message with execution time.
        """
        st = time.time()
        
        if inequality:
            for key in self.inequality_dict:
                for value in self.inequality_dict[key]:
                    self.constraints.append(Inequality(low_column_name=value, high_column_name=key))
        
#         if ABC:
#             for key in self.ABC_dict:
#                 for value_list in self.ABC_dict[key]:
#                     self.constrainsts.append()
        
#         if ABX:
#             for key in self.inequality_dict:
#                 for value_list in self.inequality_dict[key]:
#                     self.constrainsts.append()

        et = time.time()
        elapsed_time = et - st
        
        print("Constrainsts created successfully!\nExecution Time:"
              , round(elapsed_time, 4), "seconds")
        

In [32]:
runtime_bm = pd.DataFrame(columns = ['inequality_runtime','ABC_runtime','ABX_runtime'])
runtime_bm['inequality_runtime'] = 5.0

data_div = data_select.sample(round(data_select.shape[0]*0.05),ignore_index=True)
    
detector_obj = Detector(data_div, 0.95)
detector_obj.detect_inequality()
runtime_bm['inequality_runtime'] = detector_obj.inequality_runtime

detector_obj.detect_ABC()
runtime_bm['ABC_runtime'] = detector_obj.ABC_runtime

detector_obj.detect_ABX()
runtime_bm['ABX_runtime'] = detector_obj.ABX_runtime
list_row = [detector_obj.inequality_runtime,detector_obj.ABC_runtime,detector_obj.ABX_runtime]

runtime_bm.loc[len(runtime_bm)] = list_row
runtime_bm

3 relationships detected
Execution Time: 0.6403 seconds
1 relationships detected
Execution Time: 0.1037 seconds
3 relationships detected
Execution Time: 0.2294 seconds


Unnamed: 0,inequality_runtime,ABC_runtime,ABX_runtime
0,0.640286,0.103704,0.229437


In [37]:
runtime_bm

Unnamed: 0,data_shape,inequality_runtime,ABC_runtime,ABX_runtime
0,"(2500, 6)",0.6166,0.1032,0.2295
1,"(5000, 6)",1.2236,0.2072,0.4627
2,"(7500, 6)",1.823,0.3145,0.729
3,"(10000, 6)",2.4444,0.4146,0.9295
4,"(12500, 6)",3.1063,0.531,1.2013
5,"(15000, 6)",3.7028,0.6284,1.4029
6,"(17500, 6)",4.4046,0.7393,1.6483
7,"(20000, 6)",4.9521,0.8451,1.8723
8,"(22500, 6)",5.6108,0.9699,2.1376
9,"(25000, 6)",6.1876,1.0374,2.3227


In [36]:
data_select = data.sample(round(data.shape[0]*0.1),ignore_index=True)
runtime_bm = pd.DataFrame(columns = ['data_shape','inequality_runtime','ABC_runtime','ABX_runtime'])

for div in np.linspace(1, 0, num=20, endpoint=False)[::-1]:
    
    print("*"*20)
    data_div = data_select.sample(round(data_select.shape[0]*round(div,2)),ignore_index=True)
    
    detector_obj = Detector(data_div, 0.95)
    
    detector_obj.detect_inequality()
    
    detector_obj.detect_ABC()
    
    detector_obj.detect_ABX()
    
    list_row = [data_div.shape, 
                round(detector_obj.inequality_runtime,4),
                round(detector_obj.ABC_runtime,4),
                round(detector_obj.ABX_runtime,4)
               ]
    
    runtime_bm.loc[len(runtime_bm)] = list_row
    

********************
3 relationships detected
Execution Time: 0.6166 seconds
1 relationships detected
Execution Time: 0.1032 seconds
3 relationships detected
Execution Time: 0.2295 seconds
********************
3 relationships detected
Execution Time: 1.2236 seconds
1 relationships detected
Execution Time: 0.2072 seconds
3 relationships detected
Execution Time: 0.4627 seconds
********************
3 relationships detected
Execution Time: 1.823 seconds
1 relationships detected
Execution Time: 0.3145 seconds
3 relationships detected
Execution Time: 0.729 seconds
********************
3 relationships detected
Execution Time: 2.4444 seconds
1 relationships detected
Execution Time: 0.4146 seconds
3 relationships detected
Execution Time: 0.9295 seconds
********************
3 relationships detected
Execution Time: 3.1063 seconds
1 relationships detected
Execution Time: 0.531 seconds
3 relationships detected
Execution Time: 1.2013 seconds
********************
3 relationships detected
Execution Ti

In [58]:
data_select = data.copy()

for div in np.linspace(1, 0, num=20, endpoint=False):
    
    print("*"*20)
    data_div = data_select.sample(round(data_select.shape[0]*round(div,2)),ignore_index=True)
    
    if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):
        pass
    else:
        detector_obj = Detector(data_div, 0.95)

        detector_obj.detect_inequality()

        detector_obj.detect_ABC()

        detector_obj.detect_ABX()

        list_row = [data_div.shape, 
                    round(detector_obj.inequality_runtime,4),
                    round(detector_obj.ABC_runtime,4),
                    round(detector_obj.ABX_runtime,4)
                   ]

        runtime_bm.loc[len(runtime_bm)] = list_row
    

********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 131.2707 seconds
1 relationships detected
Execution Time: 21.8967 seconds
3 relationships detected
Execution Time: 48.1451 seconds
********************
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 125.4218 seconds
1 relationships detected
Execution Time: 21.2625 seconds
3 relationships detected
Execution Time: 49.9915 seconds
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 122.2252 seconds
1 relationships detected
Execution Time: 21.7451 seconds
3 relationships detected
Execution Time: 47.7893 seconds
********************
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 202.5212 seconds
1 relationships detected
Execution Time: 16.3544 seconds
3 relationships detected
Execution Time: 39.2178 seconds
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 91.2009 seconds
1 relationships detected
Execution Time: 15.8853 seconds
3 relationships detected
Execution Time: 34.5387 seconds
********************
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 77.8057 seconds
1 relationships detected
Execution Time: 13.134 seconds
3 relationships detected
Execution Time: 29.505 seconds
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 70.5248 seconds
1 relationships detected
Execution Time: 11.5651 seconds
3 relationships detected
Execution Time: 26.1706 seconds
********************
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 56.3154 seconds
1 relationships detected
Execution Time: 9.5645 seconds
3 relationships detected
Execution Time: 21.4908 seconds
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 50.1869 seconds
1 relationships detected
Execution Time: 8.5757 seconds
3 relationships detected
Execution Time: 19.086 seconds
********************
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 37.7801 seconds
1 relationships detected
Execution Time: 6.4824 seconds
3 relationships detected
Execution Time: 14.2914 seconds
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


3 relationships detected
Execution Time: 31.2503 seconds
1 relationships detected
Execution Time: 5.7247 seconds
3 relationships detected
Execution Time: 12.0582 seconds
********************
********************
********************
********************


  if (runtime_bm['data_shape'].str.contains(str(data_div.shape)).any()):


In [9]:
path ="/Users/santoshitadanki/Synthetic_Data_Generation_DS/data/benchmark_results/" 
runtime_bm.to_csv(path+"runtime_benchmark__02.23.01.csv",index=False)
runtime_bm

NameError: name 'runtime_bm' is not defined

-----

In [70]:
data_select = data.sample(round(data.shape[0]*0.1),ignore_index=True)
model_bm = pd.DataFrame(columns = ['data_rows','data_columns','constrain type','modelfit_time','datagen_time'])

for div in np.linspace(1, 0, num=20, endpoint=False)[::-1]:
    
    print("*"*20)
    data_div = data_select.sample(round(data_select.shape[0]*round(div,2)),ignore_index=True)
    
    detector_obj = Detector(data_div, 0.95)
    
    detector_obj.detect_inequality()
    
    detector_obj.create_constraints()
    
    model = GaussianCopula(constraints=detector_obj.constraints)

    st = time.time()
    model.fit(detector_obj.data)
    et = time.time()
    modelfit_time = et - st
    
    st = time.time()
    new_data = model.sample(num_rows= round(data_div.shape[0]*0.1))
    et = time.time()
    datagen_time = et - st
    
    list_row = [data_div.shape[0], data_div.shape[1],
                "Inequality",
                modelfit_time,datagen_time
               ]
    
    model_bm.loc[len(model_bm)] = list_row
    

********************




3 relationships detected
Execution Time: 0.6208 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|████████████████████████| 250/250 [00:00<00:00, 3159.72it/s]

********************





3 relationships detected
Execution Time: 1.2578 seconds
Constrainsts created successfully!
Execution Time: 0.0 seconds


Sampling rows: 100%|████████████████████████| 500/500 [00:00<00:00, 4706.10it/s]


********************




3 relationships detected
Execution Time: 1.8661 seconds
Constrainsts created successfully!
Execution Time: 0.0001 seconds


Sampling rows: 100%|████████████████████████| 750/750 [00:00<00:00, 4611.92it/s]


********************




3 relationships detected
Execution Time: 2.5262 seconds
Constrainsts created successfully!
Execution Time: 0.0001 seconds


Sampling rows: 100%|██████████████████████| 1000/1000 [00:00<00:00, 3098.97it/s]


********************
3 relationships detected
Execution Time: 3.336 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 1250/1250 [00:00<00:00, 2926.59it/s]


********************




3 relationships detected
Execution Time: 4.304 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 1500/1500 [00:00<00:00, 2703.52it/s]


********************




3 relationships detected
Execution Time: 4.3993 seconds
Constrainsts created successfully!
Execution Time: 0.0001 seconds


Sampling rows: 100%|██████████████████████| 1750/1750 [00:00<00:00, 2674.69it/s]


********************


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale


3 relationships detected
Execution Time: 5.0896 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 2000/2000 [00:01<00:00, 1832.25it/s]


********************
3 relationships detected
Execution Time: 5.6666 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 2250/2250 [00:00<00:00, 2333.62it/s]


********************
3 relationships detected
Execution Time: 6.3585 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 2500/2500 [00:01<00:00, 2174.66it/s]


********************
3 relationships detected
Execution Time: 7.2012 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 2750/2750 [00:00<00:00, 3036.15it/s]


********************
3 relationships detected
Execution Time: 7.8464 seconds
Constrainsts created successfully!
Execution Time: 0.0026 seconds


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale
Sampling rows: 100%|██████████████████████| 3000/3000 [00:00<00:00, 3002.05it/s]


********************
3 relationships detected
Execution Time: 8.4345 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 3250/3250 [00:01<00:00, 2857.37it/s]


********************
3 relationships detected
Execution Time: 9.4959 seconds
Constrainsts created successfully!
Execution Time: 0.0004 seconds


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale
Sampling rows: 100%|██████████████████████| 3500/3500 [00:02<00:00, 1636.47it/s]


********************
3 relationships detected
Execution Time: 9.7695 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale
Sampling rows: 100%|██████████████████████| 3750/3750 [00:01<00:00, 2594.05it/s]


********************
3 relationships detected
Execution Time: 10.1258 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 4000/4000 [00:01<00:00, 2415.02it/s]


********************
3 relationships detected
Execution Time: 11.0379 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale
Sampling rows: 100%|██████████████████████| 4250/4250 [00:03<00:00, 1410.30it/s]


********************
3 relationships detected
Execution Time: 12.0362 seconds
Constrainsts created successfully!
Execution Time: 0.0023 seconds


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale
Sampling rows: 100%|██████████████████████| 4500/4500 [00:02<00:00, 1861.24it/s]


********************
3 relationships detected
Execution Time: 12.1874 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


Sampling rows: 100%|██████████████████████| 4750/4750 [00:02<00:00, 2211.52it/s]


********************
3 relationships detected
Execution Time: 12.6625 seconds
Constrainsts created successfully!
Execution Time: 0.0002 seconds


  a = (self.min - loc) / scale
  b = (self.max - loc) / scale
Sampling rows: 100%|██████████████████████| 5000/5000 [00:03<00:00, 1403.42it/s]


In [None]:
data_select = data.copy()

for div in np.linspace(1, 0, num=20, endpoint=False):
    
    print("*"*20)
    data_div = data_select.sample(round(data_select.shape[0]*round(div,2)),ignore_index=True)
    
    if (runtime_bm['data_shape']==data_div.shape):
        pass
    
    else:
        data_div = data_select.sample(round(data_select.shape[0]*round(div,2)),ignore_index=True)

        detector_obj = Detector(data_div, 0.95)

        detector_obj.detect_inequality()

        detector_obj.create_constraints()

        model = GaussianCopula(constraints=detector_obj.constraints)

        st = time.time()
        model.fit(detector_obj.data)
        et = time.time()
        modelfit_time = et - st

        st = time.time()
        new_data = model.sample(num_rows= round(data_div.shape[0]*0.1))
        et = time.time()
        datagen_time = et - st

        list_row = [data_div.shape[0], data_div.shape[1],
                    "Inequality",
                    modelfit_time,datagen_time
                   ]

        model_bm.loc[len(model_bm)] = list_row


In [74]:
path ="/Users/santoshitadanki/Synthetic_Data_Generation_DS/data/benchmark_results/" 
model_bm.to_csv(path+"model_benchmark_02.23.01.csv",index=False)
model_bm

Unnamed: 0,data_rows,data_columns,constrain type,modelfit_time,datagen_time
0,2500,6,Inequality,0.095702,0.081148
1,5000,6,Inequality,0.113199,0.107673
2,7500,6,Inequality,0.114196,0.164463
3,10000,6,Inequality,0.138735,0.325549
4,12500,6,Inequality,0.205123,0.429706
5,15000,6,Inequality,0.186962,0.557639
6,17500,6,Inequality,0.175969,0.656279
7,20000,6,Inequality,0.187943,1.094658
8,22500,6,Inequality,0.235531,0.96612
9,25000,6,Inequality,0.201111,1.153947
