This notebook performs the synthesis using our proposed synthesization approach applied to the IPUMS data. The synthesis model is a CART. Logistic regression with three way interactions is used as the pMSE model.

In [12]:
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import itertools
import os
import pickle

from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction

from sklearn.preprocessing import StandardScaler

from helper_functions import *

rng = np.random.RandomState(42)

Import the data.

In [13]:
# import standardized lat/long location data
train_data = pd.read_csv("../Data/IBM_Telco_Cleaned.csv")

In [14]:
train_data

Unnamed: 0,label,cat6,cat9,cat17,cat20,cat22,cat23
0,1,fbad5c96,a73ee510,8efede7f,a458ea53,ad3062eb,c7dc6720
1,0,fbad5c96,a73ee510,e5ba7672,a458ea53,c9d4222a,bcdee96c
2,0,fbad5c96,a73ee510,e5ba7672,a458ea53,c0061c6d,423fab69
3,0,7e0ccccf,a73ee510,3486227d,a458ea53,ad3062eb,c7dc6720
4,0,7e0ccccf,a73ee510,e5ba7672,5840adea,ad3062eb,32c7478e
...,...,...,...,...,...,...,...
10071,1,7e0ccccf,a73ee510,e5ba7672,a458ea53,c9d4222a,32c7478e
10072,0,7e0ccccf,a73ee510,e5ba7672,b1252a9d,ad3062eb,c7dc6720
10073,0,7e0ccccf,7cc72ec2,1e88c74f,5840adea,c9d4222a,bcdee96c
10074,0,fbad5c96,a73ee510,e5ba7672,a458ea53,c0061c6d,c7dc6720


***

Functions for logistic and multinomial logistic regression synthesizer.

In [8]:
def train_models_mn(#overall parameters
                    train_data,
                    number_synthetic_datasets,
                    # hyperparameters for GMM, end with underscore means Bayesian optimization will choose
                    number_gmm_initializations,
                    num_components_,
                    # hyperparameters for CART, end with underscore means Bayesian optimization will choose
                    C_gender,
                    C_SeniorCitizen,
                    C_Partner,
                    C_Dependents,
                    C_tenure,
                    C_PhoneService,
                    C_MultipleLines,
                    C_OnlineSecurity,
                    C_OnlineBackup,
                    C_DeviceProtection,
                    C_TechSupport,
                    C_StreamingTV,
                    C_StreamingMovies,
                    C_PaperlessBilling
):
    
    num_samples = train_data.shape[0]

    # normalized version of training data
    scaler = StandardScaler().fit(train_data.loc[:,['Churn']])
    norm_train = train_data.copy()
    norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])
    
    ########## Code for GMM ############
    
    # fit GMM model
    GMM = GaussianMixture(num_components_, n_init=number_gmm_initializations, covariance_type='full', init_params="k-means++", random_state=rng).fit(norm_train.loc[:,['Churn']])
    
    # list for synthetic datasets
    sXs = []
    
    # generate and store number_synthetic_datasets synthetic datasets
    for i in range(number_synthetic_datasets):
        sX = GMM.sample(num_samples)[0]
        sX = pd.DataFrame(sX, columns=['Churn'])
        sXs.append(sX)
        
    ####################################################################################################
        
    ########### Code for gender MN ##########
    
    synth_gender_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.gender, 
                                                   penalty_param=C_gender)
    print("After synthesis:", synth_gender_vars[i].std())
    sXs = [pd.concat([Y, synth_gender_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################
        
    ########### Code for SeniorCitizen MN ##########
    
    synth_SeniorCitizen_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['gender', 'Churn']], 
                                             synth_data_sets=sXs, 
                                             target=norm_train.SeniorCitizen, 
                                             penalty_param=C_SeniorCitizen)
    print("After synthesis:", synth_SeniorCitizen_vars[i].std())
    sXs = [pd.concat([Y, synth_SeniorCitizen_vars[i]], axis=1) for i,Y in enumerate(sXs)]
    
    ####################################################################################################

    ########### Code for Partner MN ##########
    
    synth_Partner_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.Partner, 
                                                   penalty_param=C_Partner)
    print("After synthesis:", synth_Partner_vars[i].std())
    sXs = [pd.concat([Y, synth_Partner_vars[i]], axis=1) for i,Y in enumerate(sXs)]  
     
    ####################################################################################################

    ########### Code for Dependents MN ##########

    synth_Dependents_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.Dependents, 
                                                   penalty_param=C_Dependents)
    print("After synthesis:", synth_Dependents_vars[i].std())
    sXs = [pd.concat([Y, synth_Dependents_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for tenure MN ##########

    synth_tenure_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.tenure, 
                                                   penalty_param=C_tenure)
    print("After synthesis:", synth_tenure_vars[i].std())
    sXs = [pd.concat([Y, synth_tenure_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for PhoneService MN ##########

    synth_PhoneService_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.PhoneService, 
                                                   penalty_param=C_PhoneService)
    print("After synthesis:", synth_PhoneService_vars[i].std())
    sXs = [pd.concat([Y, synth_PhoneService_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for MultipleLines MN ##########

    synth_MultipleLines_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.MultipleLines, 
                                                   penalty_param=C_MultipleLines)
    print("After synthesis:", synth_MultipleLines_vars[i].std())
    sXs = [pd.concat([Y, synth_MultipleLines_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for OnlineSecurity MN ##########

    synth_OnlineSecurity_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.OnlineSecurity, 
                                                   penalty_param=C_OnlineSecurity)
    print("After synthesis:", synth_OnlineSecurity_vars[i].std())
    sXs = [pd.concat([Y, synth_OnlineSecurity_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for OnlineBackup MN ##########

    synth_OnlineBackup_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.OnlineBackup, 
                                                   penalty_param=C_OnlineBackup)
    print("After synthesis:", synth_OnlineBackup_vars[i].std())
    sXs = [pd.concat([Y, synth_OnlineBackup_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for DeviceProtection MN ##########

    synth_DeviceProtection_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.DeviceProtection, 
                                                   penalty_param=C_DeviceProtection)
    print("After synthesis:", synth_DeviceProtection_vars[i].std())
    sXs = [pd.concat([Y, synth_DeviceProtection_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for TechSupport MN ##########

    synth_TechSupport_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.TechSupport, 
                                                   penalty_param=C_TechSupport)
    print("After synthesis:", synth_TechSupport_vars[i].std())
    sXs = [pd.concat([Y, synth_TechSupport_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for StreamingTV MN ##########

    synth_StreamingTV_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['TechSupport', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.StreamingTV, 
                                                   penalty_param=C_StreamingTV)
    print("After synthesis:", synth_StreamingTV_vars[i].std())
    sXs = [pd.concat([Y, synth_StreamingTV_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for StreamingMovies MN ##########

    synth_StreamingMovies_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['StreamingTV', 'TechSupport', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.StreamingMovies, 
                                                   penalty_param=C_StreamingMovies)
    print("After synthesis:", synth_StreamingMovies_vars[i].std())
    sXs = [pd.concat([Y, synth_StreamingMovies_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for PaperlessBilling MN ##########

    synth_PaperlessBilling_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['StreamingMovies', 'StreamingTV', 'TechSupport', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
                                                   synth_data_sets=sXs, 
                                                   target=norm_train.PaperlessBilling, 
                                                   penalty_param=C_PaperlessBilling)
    print("After synthesis:", synth_PaperlessBilling_vars[i].std())
    sXs = [pd.concat([Y, synth_PaperlessBilling_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for MonthlyCharges MN ##########

    #synth_MonthlyCharges_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['PaperlessBilling', 'StreamingMovies', 'StreamingTV', 'TechSupport', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
   #                                                synth_data_sets=sXs, 
   #                                                target=norm_train.MonthlyCharges, 
   #                                               penalty_param=C_MonthlyCharges)
   # print("After synthesis:", synth_MonthlyCharges_vars[i].std())
    #sXs = [pd.concat([Y, synth_MonthlyCharges_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################

    ########### Code for TotalCharges MN ##########

    #synth_TotalCharges_vars = multinomial_synthesizer(orig_data=norm_train.loc[:,['MonthlyCharges', 'PaperlessBilling', 'StreamingMovies', 'StreamingTV', 'TechSupport', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'MultipleLines', 'PhoneService', 'tenure', 'Dependents', 'Partner', 'SeniorCitizen', 'gender', 'Churn']], 
    #                                               synth_data_sets=sXs, 
   #                                                target=norm_train.TotalCharges, 
    #                                               penalty_param=C_TotalCharges)
    #print("After synthesis:", synth_TotalCharges_vars[i].std())
    #sXs = [pd.concat([Y, synth_TotalCharges_vars[i]], axis=1) for i,Y in enumerate(sXs)]  

    ####################################################################################################
        
    ###### Calculate pMSE ratios ######
    pmse_ratios = [pmse_ratio(norm_train, Y) for Y in sXs]

    # convert sXs to original scale (un-normalize?)
    for i, Z in enumerate(sXs):
        H = train_data.sample(frac=1.0, replace=True, ignore_index=True).loc[:,['Churn']]
        scaler = StandardScaler().fit(H)
        Z.loc[:,['Churn']] = scaler.inverse_transform(Z.loc[:,['Churn']])
    print("pmse_ratios:", pd.Series(pmse_ratios).isna().sum())
    print("sXs:", pd.Series(sXs).isna().sum())
    print("GMM:", pd.Series(GMM).isna().sum())
    return pmse_ratios, sXs, GMM

In [9]:
def optimize_models_mn(train_data,
                       number_synthetic_datasets,
                       number_gmm_initializations,
                       random_state):

    def evaluate_models(num_components_, 
                        C_gender,
                        C_SeniorCitizen,
                        C_Partner,
                        C_Dependents,
                        C_tenure,
                        C_PhoneService,
                        C_MultipleLines,
                        C_OnlineSecurity,
                        C_OnlineBackup,
                        C_DeviceProtection,
                        C_TechSupport,
                        C_StreamingTV,
                        C_StreamingMovies,
                        C_PaperlessBilling):

        pmse_ratios, _, _ = train_models_mn(train_data=train_data,
                                            number_synthetic_datasets=number_synthetic_datasets,
                                            number_gmm_initializations=number_gmm_initializations,
                                            num_components_=int(num_components_),
                                            C_gender = C_gender,
                        C_SeniorCitizen = C_SeniorCitizen,
                        C_Partner = C_Partner,
                        C_Dependents = C_Dependents,
                        C_tenure = C_tenure,
                        C_PhoneService = C_PhoneService,
                        C_MultipleLines = C_MultipleLines,
                        C_OnlineSecurity = C_OnlineSecurity,
                        C_OnlineBackup = C_OnlineBackup,
                        C_DeviceProtection = C_DeviceProtection,
                        C_TechSupport = C_TechSupport,
                        C_StreamingTV = C_StreamingTV,
                        C_StreamingMovies = C_StreamingMovies,
                        C_PaperlessBilling = C_PaperlessBilling)
        
        return -1 * ((1 - np.mean(pmse_ratios))**2)

    optimizer = BayesianOptimization(
        f=evaluate_models,
        pbounds={
            "num_components_": (10, 200.99),
            "C_gender": (0.001, 3),
            "C_SeniorCitizen": (0.001, 3),
            "C_Partner": (0.001, 3),
            "C_Dependents": (0.001, 3),
            "C_tenure": (0.001, 3),
            "C_PhoneService": (0.001, 3),
            "C_MultipleLines": (0.001, 3),
            "C_OnlineSecurity": (0.001, 3),
            "C_OnlineBackup": (0.001, 3),
            "C_DeviceProtection": (0.001, 3),
            "C_TechSupport": (0.001, 3),
            "C_StreamingTV": (0.001, 3),
            "C_StreamingMovies": (0.001, 3),
            "C_PaperlessBilling": (0.001, 3)
        },
        random_state=random_state)
    
    utility = UtilityFunction(kind="ei", xi=1e-02)
    optimizer.maximize(init_points=5, n_iter=25, acquisition_function=utility)
    print("Final Result: ", optimizer.max)
    return optimizer.max, optimizer

The default value for $\alpha = 1e-06$.

In [10]:
nsd = 10
ngi = 2
random_states = [np.random.RandomState(1006), np.random.RandomState(428)]

In [11]:
optimization_results = [optimize_models_mn(train_data=train_data, number_synthetic_datasets=nsd, number_gmm_initializations=ngi, random_state=r) for r in random_states]
print("done")

|   iter    |  target   | C_Depe... | C_Devi... | C_Mult... | C_Onli... | C_Onli... | C_Pape... | C_Partner | C_Phon... | C_Seni... | C_Stre... | C_Stre... | C_Tech... | C_gender  | C_tenure  | num_co... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000300111126067




After synthesis: 0.3697789426902705




After synthesis: 0.4998515914033822




After synthesis: 0.4486794017244652




After synthesis: 24.502340839815766




After synthesis: 0.29863421796671286




After synthesis: 0.46085296005223586




After synthesis: 0.45075006805284973




After synthesis: 0.4785789100220464




After synthesis: 0.4733272238913268




After synthesis: 0.4492364622692459




After synthesis: 0.4827654793973982




After synthesis: 0.4792186496666769




After synthesis: 0.4922372374655851
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m1        [0m | [0m-3.397e+0[0m | [0m2.23     [0m | [0m0.68     [0m | [0m1.928    [0m | [0m2.647    [0m | [0m2.222    [0m | [0m2.164    [0m | [0m0.8867   [0m | [0m1.577    [0m | [0m0.5221   [0m | [0m2.436    [0m | [0m2.548    [0m | [0m0.7703   [0m | [0m0.7125   [0m | [0m1.376    [0m | [0m53.72    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000306562763046




After synthesis: 0.36743611524220177




After synthesis: 0.4996026088390762




After synthesis: 0.4525712873257519




After synthesis: 24.325777896193152




After synthesis: 0.3040863279501736




After synthesis: 0.46354890473053373




After synthesis: 0.45068184781964177




After synthesis: 0.47605315857173147




After synthesis: 0.4763261342869661




After synthesis: 0.44804843819373413




After synthesis: 0.482534757445582




After synthesis: 0.47494440416504546




After synthesis: 0.4938428343739478
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m2        [0m | [0m-3.44e+03[0m | [0m0.6068   [0m | [0m2.948    [0m | [0m2.942    [0m | [0m2.205    [0m | [0m1.229    [0m | [0m1.857    [0m | [0m0.2386   [0m | [0m1.198    [0m | [0m1.817    [0m | [0m1.352    [0m | [0m1.474    [0m | [0m1.094    [0m | [0m0.8678   [0m | [0m2.986    [0m | [0m166.0    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49992657254167283




After synthesis: 0.37221689052182727




After synthesis: 0.49976647276923025




After synthesis: 0.4497209704618997




After synthesis: 24.88273574269308




After synthesis: 0.30793978769551733




After synthesis: 0.4632610884031088




After synthesis: 0.44734204202205186




After synthesis: 0.47913394745756027




After synthesis: 0.47659743066556864




After synthesis: 0.4482592628703848




After synthesis: 0.481035909173932




After synthesis: 0.4771349968534866




After synthesis: 0.49495935971463156
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m3        [0m | [0m-3.505e+0[0m | [0m0.7958   [0m | [0m1.075    [0m | [0m2.009    [0m | [0m2.777    [0m | [0m0.05582  [0m | [0m2.509    [0m | [0m0.6121   [0m | [0m1.673    [0m | [0m2.657    [0m | [0m0.5572   [0m | [0m1.073    [0m | [0m1.624    [0m | [0m0.7103   [0m | [0m0.6323   [0m | [0m110.2    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49996169969585913




After synthesis: 0.3671737800572459




After synthesis: 0.49990483360600796




After synthesis: 0.4535659134308788




After synthesis: 24.688067220563347




After synthesis: 0.30203648091846813




After synthesis: 0.474615920298263




After synthesis: 0.45303696407340543




After synthesis: 0.4716550845698429




After synthesis: 0.47249806013015655




After synthesis: 0.43787137210138705




After synthesis: 0.48131690865206317




After synthesis: 0.4805898532042456




After synthesis: 0.49251381117269477
pmse_ratios: 0
sXs: 0
GMM: 0
| [95m4        [0m | [95m-2.979e+0[0m | [95m1.945    [0m | [95m0.7459   [0m | [95m0.06765  [0m | [95m0.3614   [0m | [95m1.277    [0m | [95m2.292    [0m | [95m2.13     [0m | [95m0.5991   [0m | [95m1.131    [0m | [95m2.418    [0m | [95m0.7809   [0m | [95m0.3059   [0m | [95m2.768    [0m | [95m0.1074   [0m | [95m113.3    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000348901424159




After synthesis: 0.3710665149650793




After synthesis: 0.49977111231002325




After synthesis: 0.4476959423859782




After synthesis: 24.614781508704727




After synthesis: 0.2967177029540009




After synthesis: 0.46162582242026795




After synthesis: 0.44992778491851004




After synthesis: 0.4755021575447143




After synthesis: 0.4754559363246205




After synthesis: 0.4513615759857766




After synthesis: 0.4798480823072062




After synthesis: 0.47342386790715507




After synthesis: 0.4928827301397736
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m5        [0m | [0m-3.346e+0[0m | [0m0.05269  [0m | [0m1.597    [0m | [0m2.531    [0m | [0m0.3284   [0m | [0m2.335    [0m | [0m1.059    [0m | [0m2.016    [0m | [0m0.4608   [0m | [0m0.2048   [0m | [0m2.285    [0m | [0m0.219    [0m | [0m2.931    [0m | [0m2.029    [0m | [0m2.107    [0m | [0m194.9    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49998202472906733




After synthesis: 0.36332369213190485




After synthesis: 0.4998236369766455




After synthesis: 0.44832942578625307




After synthesis: 24.418074936247567




After synthesis: 0.3148962551869839




After synthesis: 0.46192076863729353




After synthesis: 0.45068184781964177




After synthesis: 0.48001419546228974




After synthesis: 0.46782287955795665




After synthesis: 0.44453303619040485




After synthesis: 0.47947165727534186




After synthesis: 0.4766424836614372




After synthesis: 0.4945432718255817
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m6        [0m | [0m-3.502e+0[0m | [0m0.7624   [0m | [0m0.1361   [0m | [0m2.853    [0m | [0m2.081    [0m | [0m0.04833  [0m | [0m2.632    [0m | [0m0.2462   [0m | [0m2.811    [0m | [0m1.715    [0m | [0m2.855    [0m | [0m0.7547   [0m | [0m1.189    [0m | [0m2.182    [0m | [0m2.206    [0m | [0m117.5    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000261804360061




After synthesis: 0.3657236311563656




After synthesis: 0.4997523118174976




After synthesis: 0.44902798232223984




After synthesis: 24.579417946183995




After synthesis: 0.2978699039597695




After synthesis: 0.47155499950728147




After synthesis: 0.45223702997571114




After synthesis: 0.4798480823072062




After synthesis: 0.47399974509938075




After synthesis: 0.44357738474374636




After synthesis: 0.4843719196364896




After synthesis: 0.48071204919849864




After synthesis: 0.4912423924792137
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m7        [0m | [0m-3.08e+03[0m | [0m0.9409   [0m | [0m1.08     [0m | [0m0.1051   [0m | [0m2.405    [0m | [0m0.9912   [0m | [0m0.4648   [0m | [0m0.7555   [0m | [0m0.04254  [0m | [0m1.582    [0m | [0m1.996    [0m | [0m1.495    [0m | [0m1.112    [0m | [0m2.749    [0m | [0m2.435    [0m | [0m62.7     [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49993528398341824




After synthesis: 0.3709382034578107




After synthesis: 0.4995846898975681




After synthesis: 0.4476959423859782




After synthesis: 24.49028884271753




After synthesis: 0.3039008812922957




After synthesis: 0.4607332866515529




After synthesis: 0.45230399036233837




After synthesis: 0.4812368493168238




After synthesis: 0.47175497744077854




After synthesis: 0.4533679632888066




After synthesis: 0.4787076844851935




After synthesis: 0.47963941413123545




After synthesis: 0.4930035969130738
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m8        [0m | [0m-3.362e+0[0m | [0m2.417    [0m | [0m2.992    [0m | [0m0.8586   [0m | [0m2.211    [0m | [0m2.173    [0m | [0m2.499    [0m | [0m0.3518   [0m | [0m2.357    [0m | [0m0.7729   [0m | [0m1.832    [0m | [0m0.5798   [0m | [0m2.065    [0m | [0m1.104    [0m | [0m0.314    [0m | [0m47.31    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4998739780204067




After synthesis: 0.3680901766558851




After synthesis: 0.4994627513830733




After synthesis: 0.44909753128709257




After synthesis: 24.536828088284945




After synthesis: 0.3053794526702137




After synthesis: 0.46308779027009284




After synthesis: 0.44846958381116614




After synthesis: 0.47466298791576




After synthesis: 0.47327883057998005




After synthesis: 0.4519014099572842




After synthesis: 0.48218564501807093




After synthesis: 0.4738564158388552




After synthesis: 0.4936610348941344
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m9        [0m | [0m-3.294e+0[0m | [0m1.41     [0m | [0m0.9881   [0m | [0m0.5299   [0m | [0m0.129    [0m | [0m2.392    [0m | [0m2.475    [0m | [0m2.254    [0m | [0m1.82     [0m | [0m1.875    [0m | [0m1.41     [0m | [0m0.3972   [0m | [0m2.83     [0m | [0m0.4202   [0m | [0m1.137    [0m | [0m147.0    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000285998141155




After synthesis: 0.3663843267513063




After synthesis: 0.49965418274594414




After synthesis: 0.4521700150824167




After synthesis: 24.49154503857474




After synthesis: 0.31100334939878327




After synthesis: 0.46446142552648895




After synthesis: 0.4497899640067554




After synthesis: 0.4741901881087765




After synthesis: 0.4724488563826021




After synthesis: 0.44133467193364806




After synthesis: 0.47972301838246867




After synthesis: 0.47517762352351595




After synthesis: 0.49243888871739866
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m10       [0m | [0m-3.428e+0[0m | [0m0.7205   [0m | [0m1.213    [0m | [0m2.996    [0m | [0m0.3713   [0m | [0m0.2652   [0m | [0m0.3086   [0m | [0m2.896    [0m | [0m2.464    [0m | [0m2.774    [0m | [0m2.603    [0m | [0m1.008    [0m | [0m0.5075   [0m | [0m0.3544   [0m | [0m2.585    [0m | [0m49.33    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.499784788618795




After synthesis: 0.3638599314705243




After synthesis: 0.49949625615852666




After synthesis: 0.44992778491851004




After synthesis: 24.377785842851946




After synthesis: 0.30938758875828337




After synthesis: 0.46451802961071026




After synthesis: 0.45183412216639324




After synthesis: 0.4786218808191171




After synthesis: 0.4722515631814523




After synthesis: 0.45095439856503267




After synthesis: 0.483070599906656




After synthesis: 0.4778413594382356




After synthesis: 0.4911883344604972
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m11       [0m | [0m-3.466e+0[0m | [0m1.901    [0m | [0m1.776    [0m | [0m2.851    [0m | [0m0.5272   [0m | [0m0.6787   [0m | [0m1.272    [0m | [0m0.4113   [0m | [0m1.754    [0m | [0m0.44     [0m | [0m0.1781   [0m | [0m1.969    [0m | [0m2.403    [0m | [0m2.233    [0m | [0m1.295    [0m | [0m200.7    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000167447495251




After synthesis: 0.36887171572139144




After synthesis: 0.49969727798435265




After synthesis: 0.45109034399951164




After synthesis: 24.31569443151602




After synthesis: 0.30666390307045577




After synthesis: 0.4637782466464334




After synthesis: 0.4520358216788377




After synthesis: 0.47489761917933926




After synthesis: 0.47013355934455475




After synthesis: 0.4475545509734399




After synthesis: 0.4815956927343449




After synthesis: 0.4755483318736988




After synthesis: 0.49182581588072477
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m12       [0m | [0m-3.372e+0[0m | [0m2.685    [0m | [0m2.796    [0m | [0m1.811    [0m | [0m0.06131  [0m | [0m1.413    [0m | [0m2.465    [0m | [0m0.6739   [0m | [0m0.9163   [0m | [0m0.9972   [0m | [0m2.567    [0m | [0m0.8834   [0m | [0m0.5021   [0m | [0m2.545    [0m | [0m2.445    [0m | [0m145.1    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000179544628708




After synthesis: 0.36782885609965515




After synthesis: 0.49954776032200976




After synthesis: 0.45263797554142904




After synthesis: 24.277645378675885




After synthesis: 0.3104662813317938




After synthesis: 0.4638354557339135




After synthesis: 0.4493751708266766




After synthesis: 0.47433252386120806




After synthesis: 0.47628075503278655




After synthesis: 0.4417874113016544




After synthesis: 0.4834104395480407




After synthesis: 0.47748966000506793




After synthesis: 0.4922118409158481
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m13       [0m | [0m-3.34e+03[0m | [0m0.05168  [0m | [0m1.655    [0m | [0m2.999    [0m | [0m0.5317   [0m | [0m2.554    [0m | [0m0.4509   [0m | [0m2.185    [0m | [0m1.745    [0m | [0m1.936    [0m | [0m1.191    [0m | [0m2.794    [0m | [0m0.871    [0m | [0m0.41     [0m | [0m2.318    [0m | [0m51.88    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000202529101645




After synthesis: 0.36756713034205246




After synthesis: 0.4997377873475974




After synthesis: 0.4525712873257519




After synthesis: 24.833112024909425




After synthesis: 0.30297094710985156




After synthesis: 0.4638926143022294




After synthesis: 0.4540914049225923




After synthesis: 0.47801609980683335




After synthesis: 0.4716550845698429




After synthesis: 0.44133467193364806




After synthesis: 0.4852681362969591




After synthesis: 0.47461592029826305




After synthesis: 0.4934300137756104
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m14       [0m | [0m-3.457e+0[0m | [0m0.3959   [0m | [0m0.02362  [0m | [0m2.402    [0m | [0m0.5112   [0m | [0m1.1      [0m | [0m1.48     [0m | [0m1.08     [0m | [0m1.01     [0m | [0m0.5206   [0m | [0m2.407    [0m | [0m2.064    [0m | [0m0.3978   [0m | [0m1.282    [0m | [0m0.611    [0m | [0m163.5    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000213416446686




After synthesis: 0.3728525617428432




After synthesis: 0.4999015666341543




After synthesis: 0.44811876910045534




After synthesis: 24.609490239584545




After synthesis: 0.308302767754282




After synthesis: 0.46343393017023676




After synthesis: 0.45142924685947405




After synthesis: 0.47976475200574725




After synthesis: 0.4726943973574433




After synthesis: 0.44072773408957566




After synthesis: 0.48505556467560157




After synthesis: 0.47875051737891905




After synthesis: 0.4937067360754244
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m15       [0m | [0m-3.365e+0[0m | [0m1.86     [0m | [0m1.432    [0m | [0m1.748    [0m | [0m2.749    [0m | [0m2.454    [0m | [0m1.422    [0m | [0m0.3677   [0m | [0m0.8363   [0m | [0m1.018    [0m | [0m2.796    [0m | [0m0.01683  [0m | [0m0.1058   [0m | [0m0.08775  [0m | [0m0.8636   [0m | [0m175.3    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4998703478262946




After synthesis: 0.36143356051035674




After synthesis: 0.49936355599733023




After synthesis: 0.4541568488265356




After synthesis: 24.614662459212507




After synthesis: 0.30992769709099394




After synthesis: 0.46132959499957793




After synthesis: 0.4527711889109345




After synthesis: 0.4784065667843187




After synthesis: 0.47456880552649056




After synthesis: 0.44562366287076727




After synthesis: 0.4847696616982368




After synthesis: 0.47875051737891905




After synthesis: 0.49437217080213625
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m16       [0m | [0m-3.512e+0[0m | [0m1.248    [0m | [0m0.8227   [0m | [0m2.473    [0m | [0m2.758    [0m | [0m1.139    [0m | [0m1.217    [0m | [0m0.7997   [0m | [0m1.703    [0m | [0m0.05226  [0m | [0m1.055    [0m | [0m0.02467  [0m | [0m0.9212   [0m | [0m2.51     [0m | [0m0.9682   [0m | [0m195.1    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4999973486125566




After synthesis: 0.37599468998776125




After synthesis: 0.49996883774808487




After synthesis: 0.44888871731565133




After synthesis: 24.625097794565846




After synthesis: 0.3122506558952508




After synthesis: 0.45982915280284503




After synthesis: 0.44698673376579584




After synthesis: 0.4787076844851935




After synthesis: 0.471203186509034




After synthesis: 0.44783710906979496




After synthesis: 0.48187226526588584




After synthesis: 0.4786218808191171




After synthesis: 0.49241383013951073
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m17       [0m | [0m-3.43e+03[0m | [0m2.828    [0m | [0m0.01218  [0m | [0m1.175    [0m | [0m1.964    [0m | [0m2.831    [0m | [0m2.053    [0m | [0m1.317    [0m | [0m2.032    [0m | [0m0.9501   [0m | [0m0.6482   [0m | [0m1.103    [0m | [0m1.618    [0m | [0m2.359    [0m | [0m1.189    [0m | [0m86.54    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4999048336060079




After synthesis: 0.36612035680007726




After synthesis: 0.4997377873475974




After synthesis: 0.4527711889109345




After synthesis: 24.721218242369613




After synthesis: 0.3064809370954461




After synthesis: 0.45958608294233483




After synthesis: 0.4469155028722022




After synthesis: 0.480874341882705




After synthesis: 0.472399604828919




After synthesis: 0.44965192149040134




After synthesis: 0.4801383016845261




After synthesis: 0.4754559363246205




After synthesis: 0.4930756120582472
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m18       [0m | [0m-3.42e+03[0m | [0m1.989    [0m | [0m2.107    [0m | [0m1.137    [0m | [0m1.535    [0m | [0m0.4296   [0m | [0m2.409    [0m | [0m2.289    [0m | [0m1.19     [0m | [0m1.359    [0m | [0m1.499    [0m | [0m0.267    [0m | [0m1.949    [0m | [0m2.864    [0m | [0m0.5466   [0m | [0m14.84    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000332772452672




After synthesis: 0.37399065080193916




After synthesis: 0.4994070396723993




After synthesis: 0.4520358216788377




After synthesis: 24.552598147478694




After synthesis: 0.3178507297922673




After synthesis: 0.46285601530406856




After synthesis: 0.45068184781964177




After synthesis: 0.4740474268236605




After synthesis: 0.47399974509938075




After synthesis: 0.4441666291575986




After synthesis: 0.48175400473806956




After synthesis: 0.47342386790715507




After synthesis: 0.49203288097740205
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m19       [0m | [0m-3.433e+0[0m | [0m2.171    [0m | [0m1.908    [0m | [0m2.797    [0m | [0m0.2423   [0m | [0m1.008    [0m | [0m0.4547   [0m | [0m0.2392   [0m | [0m2.241    [0m | [0m0.5173   [0m | [0m1.439    [0m | [0m0.2416   [0m | [0m0.3154   [0m | [0m1.935    [0m | [0m1.865    [0m | [0m136.8    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49978926662086687




After synthesis: 0.36835109287313245




After synthesis: 0.49976647276923025




After synthesis: 0.4516993826023347




After synthesis: 24.59027129146873




After synthesis: 0.31136055633336224




After synthesis: 0.46750043276577574




After synthesis: 0.45081823323817927




After synthesis: 0.480549030386366




After synthesis: 0.4738564158388552




After synthesis: 0.4381841025883402




After synthesis: 0.4790065503667984




After synthesis: 0.4775337843777437




After synthesis: 0.4930035969130738
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m20       [0m | [0m-3.354e+0[0m | [0m0.8743   [0m | [0m1.918    [0m | [0m2.551    [0m | [0m2.577    [0m | [0m2.865    [0m | [0m1.716    [0m | [0m1.768    [0m | [0m1.824    [0m | [0m0.3094   [0m | [0m2.086    [0m | [0m0.4437   [0m | [0m0.2219   [0m | [0m1.908    [0m | [0m0.717    [0m | [0m87.94    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49997992774005784




After synthesis: 0.36651615791672526




After synthesis: 0.4997127719931618




After synthesis: 0.45487318485547823




After synthesis: 24.436226217322652




After synthesis: 0.2990152377677762




After synthesis: 0.46354890473053373




After synthesis: 0.4512260697403637




After synthesis: 0.48030313906005234




After synthesis: 0.47508447683867966




After synthesis: 0.4465585007077425




After synthesis: 0.481396787138909




After synthesis: 0.47832011896169385




After synthesis: 0.49072196526711526
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m21       [0m | [0m-3.455e+0[0m | [0m1.866    [0m | [0m0.008046 [0m | [0m2.217    [0m | [0m2.623    [0m | [0m2.171    [0m | [0m1.493    [0m | [0m1.328    [0m | [0m0.4948   [0m | [0m1.477    [0m | [0m2.547    [0m | [0m2.352    [0m | [0m1.668    [0m | [0m1.972    [0m | [0m2.293    [0m | [0m171.7    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000223900534321




After synthesis: 0.36730499851688975




After synthesis: 0.49959667628771764




After synthesis: 0.45210294565821635




After synthesis: 24.30322900951346




After synthesis: 0.30427159531745596




After synthesis: 0.46377824664643347




After synthesis: 0.4540259071770258




After synthesis: 0.48001419546228974




After synthesis: 0.4699790123056971




After synthesis: 0.4448980084723476




After synthesis: 0.48210757010785804




After synthesis: 0.4800969785053305




After synthesis: 0.4924639050779383
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m22       [0m | [0m-3.453e+0[0m | [0m1.405    [0m | [0m0.4021   [0m | [0m2.881    [0m | [0m2.592    [0m | [0m0.6909   [0m | [0m2.583    [0m | [0m2.893    [0m | [0m0.8377   [0m | [0m0.8705   [0m | [0m0.9476   [0m | [0m0.5745   [0m | [0m1.321    [0m | [0m1.205    [0m | [0m2.633    [0m | [0m118.9    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4999592396655468




After synthesis: 0.36900162005864223




After synthesis: 0.49962016393554043




After synthesis: 0.451022398756384




After synthesis: 24.45195323365495




After synthesis: 0.30297094710985156




After synthesis: 0.4604933211310257




After synthesis: 0.45474342491820724




After synthesis: 0.47849283043790647




After synthesis: 0.4754096681997406




After synthesis: 0.43436746345859334




After synthesis: 0.481396787138909




After synthesis: 0.4777975588632163




After synthesis: 0.4927363005301648
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m23       [0m | [0m-3.257e+0[0m | [0m1.255    [0m | [0m2.75     [0m | [0m0.8838   [0m | [0m1.382    [0m | [0m1.377    [0m | [0m2.233    [0m | [0m1.917    [0m | [0m1.684    [0m | [0m1.213    [0m | [0m2.911    [0m | [0m0.673    [0m | [0m0.1616   [0m | [0m0.3453   [0m | [0m2.65     [0m | [0m161.6    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49993242051699965




After synthesis: 0.3706812840181353




After synthesis: 0.4999592396655468




After synthesis: 0.4481890439838893




After synthesis: 24.50518525860738




After synthesis: 0.3061144794686533




After synthesis: 0.4595251856950382




After synthesis: 0.44769594238597826




After synthesis: 0.4802208114170764




After synthesis: 0.47722394085039954




After synthesis: 0.4432072306631912




After synthesis: 0.4837466643755952




After synthesis: 0.4765071849549024




After synthesis: 0.49283408875007667
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m24       [0m | [0m-3.373e+0[0m | [0m0.7658   [0m | [0m2.344    [0m | [0m0.9092   [0m | [0m1.666    [0m | [0m1.362    [0m | [0m2.462    [0m | [0m1.826    [0m | [0m2.38     [0m | [0m0.6434   [0m | [0m0.7849   [0m | [0m1.927    [0m | [0m0.7173   [0m | [0m1.471    [0m | [0m1.014    [0m | [0m166.6    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000154947093266




After synthesis: 0.35869698527318705




After synthesis: 0.4997847886187949




After synthesis: 0.45283771411273155




After synthesis: 24.613268281495035




After synthesis: 0.3010974405224147




After synthesis: 0.4607332866515529




After synthesis: 0.4474837709273436




After synthesis: 0.4786218808191171




After synthesis: 0.47361658591844646




After synthesis: 0.4491670245941515




After synthesis: 0.481396787138909




After synthesis: 0.4755021575447143




After synthesis: 0.4919555491639307
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m25       [0m | [0m-3.346e+0[0m | [0m2.251    [0m | [0m1.772    [0m | [0m1.122    [0m | [0m1.084    [0m | [0m2.91     [0m | [0m2.753    [0m | [0m2.034    [0m | [0m0.7713   [0m | [0m0.02937  [0m | [0m2.215    [0m | [0m1.893    [0m | [0m1.636    [0m | [0m0.9725   [0m | [0m0.1594   [0m | [0m198.2    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4999489961304994




After synthesis: 0.3692611278264873




After synthesis: 0.49953512695526475




After synthesis: 0.4554544557673103




After synthesis: 24.583804356310818




After synthesis: 0.29844342431097265




After synthesis: 0.46115124254274475




After synthesis: 0.4533679632888066




After synthesis: 0.47921864966667693




After synthesis: 0.47536335315637873




After synthesis: 0.44888871731565133




After synthesis: 0.48226354005561467




After synthesis: 0.479260932052292




After synthesis: 0.49283408875007667
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m26       [0m | [0m-3.223e+0[0m | [0m2.602    [0m | [0m2.794    [0m | [0m0.5561   [0m | [0m1.354    [0m | [0m1.13     [0m | [0m0.07857  [0m | [0m0.9309   [0m | [0m0.4452   [0m | [0m0.2169   [0m | [0m0.8656   [0m | [0m0.9876   [0m | [0m2.359    [0m | [0m0.7305   [0m | [0m0.4832   [0m | [0m109.7    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4998515914033822




After synthesis: 0.3738645832684835




After synthesis: 0.4997475107199873




After synthesis: 0.4533679632888066




After synthesis: 24.818867561902806




After synthesis: 0.3088459483376882




After synthesis: 0.4601318236302182




After synthesis: 0.4520358216788377




After synthesis: 0.47475698174401204




After synthesis: 0.47185467824215094




After synthesis: 0.44923646226924596




After synthesis: 0.4808338367458809




After synthesis: 0.47686705057373396




After synthesis: 0.4922625917460682
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m27       [0m | [0m-3.395e+0[0m | [0m2.502    [0m | [0m1.271    [0m | [0m1.213    [0m | [0m1.265    [0m | [0m1.26     [0m | [0m2.086    [0m | [0m2.459    [0m | [0m2.165    [0m | [0m1.258    [0m | [0m1.729    [0m | [0m0.3021   [0m | [0m1.154    [0m | [0m2.383    [0m | [0m0.5299   [0m | [0m114.3    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49995419858200774




After synthesis: 0.3753710542086374




After synthesis: 0.4994141452798172




After synthesis: 0.4493751708266766




After synthesis: 24.27321425753107




After synthesis: 0.3143700709342808




After synthesis: 0.46174395451378963




After synthesis: 0.45081823323817927




After synthesis: 0.4783633659003714




After synthesis: 0.47274336229794045




After synthesis: 0.44019357230740064




After synthesis: 0.4837094843356718




After synthesis: 0.4756865736504775




After synthesis: 0.4940441567687946
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m28       [0m | [0m-3.349e+0[0m | [0m2.324    [0m | [0m1.901    [0m | [0m1.555    [0m | [0m0.9802   [0m | [0m1.845    [0m | [0m0.5764   [0m | [0m0.01812  [0m | [0m2.008    [0m | [0m2.42     [0m | [0m2.9      [0m | [0m0.03689  [0m | [0m0.1207   [0m | [0m2.066    [0m | [0m2.834    [0m | [0m92.44    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.500035374010546




After synthesis: 0.3697789426902705




After synthesis: 0.49959667628771764




After synthesis: 0.4510903439995116




After synthesis: 24.375215666017464




After synthesis: 0.29203859407212523




After synthesis: 0.46519336070649353




After synthesis: 0.45088634340059636




After synthesis: 0.4790915276097601




After synthesis: 0.4765523311011312




After synthesis: 0.44011702731129315




After synthesis: 0.4833728582359587




After synthesis: 0.47735700898758027




After synthesis: 0.4922879037638282
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m29       [0m | [0m-3.147e+0[0m | [0m0.2124   [0m | [0m2.736    [0m | [0m0.5162   [0m | [0m2.765    [0m | [0m2.035    [0m | [0m0.4282   [0m | [0m1.693    [0m | [0m0.1769   [0m | [0m2.7      [0m | [0m2.662    [0m | [0m0.6978   [0m | [0m0.1123   [0m | [0m2.501    [0m | [0m2.883    [0m | [0m144.9    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000318256333852




After synthesis: 0.37016625655623375




After synthesis: 0.4995158735742007




After synthesis: 0.451022398756384




After synthesis: 24.54935700694848




After synthesis: 0.296910216730624




After synthesis: 0.46679569957288586




After synthesis: 0.45223702997571114




After synthesis: 0.47988967900916657




After synthesis: 0.47347211864074523




After synthesis: 0.449513657165288




After synthesis: 0.4795136650634441




After synthesis: 0.4783633659003714




After synthesis: 0.4933598894217032
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m30       [0m | [0m-3.527e+0[0m | [0m2.026    [0m | [0m1.643    [0m | [0m2.128    [0m | [0m2.662    [0m | [0m1.388    [0m | [0m1.898    [0m | [0m0.6701   [0m | [0m0.6384   [0m | [0m2.145    [0m | [0m1.859    [0m | [0m0.1118   [0m | [0m2.288    [0m | [0m0.7044   [0m | [0m0.2649   [0m | [0m107.2    [0m |
Final Result:  {'target': np.float64(-2979.050445216047), 'params': {'C_Dependents': np.float64(1.94491341333544), 'C_DeviceProtection': np.float64(0.745881966381418), 'C_MultipleLines': np.float64(0.06765210803215352), 'C_OnlineBackup': np.float64(0.3613922170069528), 'C_OnlineSecurity': np.float64(1.2774555759182102), 'C_PaperlessBilling': np.float64(2.292223441089738), 'C_Partner': np.float64(2.12960180855679), 'C_PhoneService': np.float64(0.5990504073600055), 'C_SeniorCitizen': np.float64(1.1309147509243902), 'C_StreamingMovies': np.float64(2.4175425895427587), 'C_StreamingTV': np.f

 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.500035374010546




After synthesis: 0.36612035680007726




After synthesis: 0.49977111231002325




After synthesis: 0.4539603555665313




After synthesis: 24.389230708429782




After synthesis: 0.3061144794686533




After synthesis: 0.46192076863729353




After synthesis: 0.4444598696772605




After synthesis: 0.4804672484514845




After synthesis: 0.47366464675160225




After synthesis: 0.442537302751779




After synthesis: 0.4831464326076483




After synthesis: 0.4777975588632163




After synthesis: 0.4916950232114027
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m1        [0m | [0m-3.376e+0[0m | [0m2.455    [0m | [0m2.623    [0m | [0m1.256    [0m | [0m1.336    [0m | [0m0.3865   [0m | [0m0.3158   [0m | [0m1.939    [0m | [0m2.886    [0m | [0m0.873    [0m | [0m0.2011   [0m | [0m1.478    [0m | [0m1.208    [0m | [0m0.6546   [0m | [0m2.504    [0m | [0m44.79    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000348901424159




After synthesis: 0.3732327965508253




After synthesis: 0.4999143924010313




After synthesis: 0.4487492293670155




After synthesis: 24.616120901752915




After synthesis: 0.31419435201535617




After synthesis: 0.4665224200322412




After synthesis: 0.45603140452896324




After synthesis: 0.4766424836614372




After synthesis: 0.47371266014580793




After synthesis: 0.45223702997571114




After synthesis: 0.4811967518028565




After synthesis: 0.4765071849549024




After synthesis: 0.49266251690717855
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m2        [0m | [0m-3.475e+0[0m | [0m1.256    [0m | [0m0.929    [0m | [0m2.961    [0m | [0m0.1235   [0m | [0m0.8866   [0m | [0m1.857    [0m | [0m2.595    [0m | [0m2.433    [0m | [0m1.105    [0m | [0m1.117    [0m | [0m1.3      [0m | [0m1.431    [0m | [0m1.736    [0m | [0m1.721    [0m | [0m101.0    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49995161752756095




After synthesis: 0.3684813996231019




After synthesis: 0.49990806022391937




After synthesis: 0.4543528577211872




After synthesis: 24.54651630294444




After synthesis: 0.3051952542118152




After synthesis: 0.4629720043917922




After synthesis: 0.45169938260233466




After synthesis: 0.472399604828919




After synthesis: 0.47303615040492314




After synthesis: 0.4482592628703848




After synthesis: 0.4828420281499512




After synthesis: 0.4766424836614372




After synthesis: 0.49319479630133073
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m3        [0m | [0m-3.428e+0[0m | [0m2.579    [0m | [0m1.614    [0m | [0m1.954    [0m | [0m0.06669  [0m | [0m1.102    [0m | [0m2.212    [0m | [0m0.842    [0m | [0m1.444    [0m | [0m2.825    [0m | [0m1.916    [0m | [0m0.8327   [0m | [0m2.423    [0m | [0m1.775    [0m | [0m1.381    [0m | [0m157.7    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000252933277664




After synthesis: 0.3772348600451261




After synthesis: 0.49989491162544575




After synthesis: 0.4488190012220731




After synthesis: 24.413764812026287




After synthesis: 0.30297094710985156




After synthesis: 0.4641776500089648




After synthesis: 0.45088634340059636




After synthesis: 0.4769118244348211




After synthesis: 0.47531699118081944




After synthesis: 0.44944444176048853




After synthesis: 0.48123684931682387




After synthesis: 0.47814667040955167




After synthesis: 0.49361516609453676
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m4        [0m | [0m-3.495e+0[0m | [0m2.317    [0m | [0m1.366    [0m | [0m2.477    [0m | [0m0.9168   [0m | [0m2.829    [0m | [0m2.302    [0m | [0m0.2461   [0m | [0m1.48     [0m | [0m2.614    [0m | [0m0.3022   [0m | [0m1.776    [0m | [0m1.982    [0m | [0m1.596    [0m | [0m0.8856   [0m | [0m39.53    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4999205631716235




After synthesis: 0.3727256224179698




After synthesis: 0.49998609770182045




After synthesis: 0.4507500680528498




After synthesis: 24.75593632265706




After synthesis: 0.3022237387323713




After synthesis: 0.4688322842414105




After synthesis: 0.44992778491851004




After synthesis: 0.47600749914774887




After synthesis: 0.47447443446391674




After synthesis: 0.4492364622692459




After synthesis: 0.4828420281499512




After synthesis: 0.48071204919849864




After synthesis: 0.49208422389891127
pmse_ratios: 0
sXs: 0
GMM: 0
| [95m5        [0m | [95m-3.238e+0[0m | [95m1.529    [0m | [95m0.5476   [0m | [95m0.3177   [0m | [95m0.2238   [0m | [95m2.682    [0m | [95m0.9374   [0m | [95m0.2854   [0m | [95m1.51     [0m | [95m1.791    [0m | [95m0.6376   [0m | [95m0.5953   [0m | [95m1.868    [0m | [95m1.352    [0m | [95m0.391    [0m | [95m18.7     [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000285998141155




After synthesis: 0.3692611278264873




After synthesis: 0.4992722158372518




After synthesis: 0.44909753128709257




After synthesis: 24.588671903004347




After synthesis: 0.2997750174191477




After synthesis: 0.465137358977122




After synthesis: 0.4536317887221767




After synthesis: 0.47682223022302256




After synthesis: 0.4703389429142405




After synthesis: 0.4468442155015939




After synthesis: 0.48230242014937835




After synthesis: 0.47591604006054045




After synthesis: 0.4930756120582472
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m6        [0m | [0m-3.426e+0[0m | [0m0.746    [0m | [0m0.9011   [0m | [0m2.205    [0m | [0m2.274    [0m | [0m1.01     [0m | [0m0.46     [0m | [0m0.06343  [0m | [0m0.05328  [0m | [0m2.935    [0m | [0m2.046    [0m | [0m2.924    [0m | [0m0.7258   [0m | [0m0.6141   [0m | [0m0.2325   [0m | [0m128.6    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49971277199316183




After synthesis: 0.3662523930826946




After synthesis: 0.4995602319265916




After synthesis: 0.4533679632888066




After synthesis: 24.340283577332603




After synthesis: 0.3070293109100169




After synthesis: 0.46308779027009284




After synthesis: 0.45061357251355333




After synthesis: 0.4777537120696594




After synthesis: 0.4759617929818409




After synthesis: 0.44342949659900527




After synthesis: 0.48026199799390357




After synthesis: 0.4767773633695761




After synthesis: 0.49368390643415444
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m7        [0m | [0m-3.44e+03[0m | [0m1.595    [0m | [0m2.98     [0m | [0m2.089    [0m | [0m0.71     [0m | [0m1.161    [0m | [0m1.673    [0m | [0m2.868    [0m | [0m2.62     [0m | [0m1.139    [0m | [0m2.231    [0m | [0m1.147    [0m | [0m0.9801   [0m | [0m0.2537   [0m | [0m1.767    [0m | [0m172.9    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4998152866498197




After synthesis: 0.3693907314684773




After synthesis: 0.49970248305652326




After synthesis: 0.4515644242321088




After synthesis: 24.766975003693037




After synthesis: 0.30482632494322526




After synthesis: 0.4603128049762188




After synthesis: 0.4525712873257519




After synthesis: 0.47573256055140994




After synthesis: 0.47399974509938075




After synthesis: 0.4419378563717718




After synthesis: 0.4799727355748698




After synthesis: 0.4744744344639167




After synthesis: 0.4922372374655851
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m8        [0m | [0m-3.467e+0[0m | [0m1.383    [0m | [0m2.678    [0m | [0m2.075    [0m | [0m2.413    [0m | [0m0.7805   [0m | [0m2.4      [0m | [0m2.732    [0m | [0m2.077    [0m | [0m2.89     [0m | [0m1.569    [0m | [0m1.817    [0m | [0m0.5769   [0m | [0m1.913    [0m | [0m0.592    [0m | [0m135.2    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.500035454655189




After synthesis: 0.36782885609965515




After synthesis: 0.4994762746653496




After synthesis: 0.45382909065737476




After synthesis: 24.537347269185478




After synthesis: 0.30556347410599766




After synthesis: 0.4699790123056971




After synthesis: 0.448819001222073




After synthesis: 0.4792609320522919




After synthesis: 0.4754559363246205




After synthesis: 0.4479780512373392




After synthesis: 0.48191159531518263




After synthesis: 0.4769118244348211




After synthesis: 0.49035701274230903
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m9        [0m | [0m-3.281e+0[0m | [0m1.828    [0m | [0m1.015    [0m | [0m0.2917   [0m | [0m0.9122   [0m | [0m2.528    [0m | [0m1.921    [0m | [0m1.076    [0m | [0m1.206    [0m | [0m0.1826   [0m | [0m1.541    [0m | [0m0.2859   [0m | [0m2.498    [0m | [0m1.442    [0m | [0m1.855    [0m | [0m137.8    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000086395946136




After synthesis: 0.3731061487101815




After synthesis: 0.4995602319265916




After synthesis: 0.4548083316805469




After synthesis: 24.77603675090573




After synthesis: 0.30992769709099394




After synthesis: 0.4608529600522358




After synthesis: 0.4512260697403637




After synthesis: 0.4806713625987614




After synthesis: 0.47195418709568465




After synthesis: 0.4489583776737271




After synthesis: 0.48005560976851136




After synthesis: 0.4790065503667984




After synthesis: 0.4920071460370473
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m10       [0m | [0m-3.495e+0[0m | [0m2.428    [0m | [0m0.8035   [0m | [0m2.072    [0m | [0m1.621    [0m | [0m2.543    [0m | [0m1.918    [0m | [0m1.725    [0m | [0m2.873    [0m | [0m2.789    [0m | [0m2.009    [0m | [0m2.032    [0m | [0m1.188    [0m | [0m1.878    [0m | [0m0.3695   [0m | [0m68.23    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4998318258090134




After synthesis: 0.3697789426902705




After synthesis: 0.49970248305652326




After synthesis: 0.4486095182683714




After synthesis: 24.347573308824337




After synthesis: 0.3007205339951399




After synthesis: 0.4654726185434432




After synthesis: 0.45020276274197374




After synthesis: 0.48001419546228974




After synthesis: 0.47682223022302256




After synthesis: 0.4414858193223981




After synthesis: 0.48272713781696014




After synthesis: 0.47770981904483795




After synthesis: 0.49190378292318426
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m11       [0m | [0m-3.341e+0[0m | [0m1.005    [0m | [0m1.081    [0m | [0m1.408    [0m | [0m2.989    [0m | [0m2.604    [0m | [0m1.15     [0m | [0m1.111    [0m | [0m1.402    [0m | [0m0.1079   [0m | [0m0.8329   [0m | [0m2.524    [0m | [0m0.5539   [0m | [0m2.677    [0m | [0m1.974    [0m | [0m65.92    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.500035454655189




After synthesis: 0.36466115322649334




After synthesis: 0.49973778734759733




After synthesis: 0.4514968629333793




After synthesis: 24.40606004139821




After synthesis: 0.29844342431097265




After synthesis: 0.4629140352586489




After synthesis: 0.4504768565825513




After synthesis: 0.47466298791576




After synthesis: 0.470696499351588




After synthesis: 0.44846958381116614




After synthesis: 0.4839688097545013




After synthesis: 0.4800556097685113




After synthesis: 0.49295537646432475
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m12       [0m | [0m-3.375e+0[0m | [0m1.241    [0m | [0m0.6468   [0m | [0m1.592    [0m | [0m0.3437   [0m | [0m2.529    [0m | [0m2.573    [0m | [0m1.309    [0m | [0m0.763    [0m | [0m2.082    [0m | [0m2.06     [0m | [0m2.246    [0m | [0m1.334    [0m | [0m1.651    [0m | [0m2.756    [0m | [0m62.67    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000261804360061




After synthesis: 0.3647943256477711




After synthesis: 0.49953512695526475




After synthesis: 0.45243774771237427




After synthesis: 24.411248865241447




After synthesis: 0.2955585544375413




After synthesis: 0.4688322842414105




After synthesis: 0.4546784645455102




After synthesis: 0.4779724839790609




After synthesis: 0.473181901277498




After synthesis: 0.4529706016700835




After synthesis: 0.48079328626055806




After synthesis: 0.4763261342869661




After synthesis: 0.4917736259865115
pmse_ratios: 0
sXs: 0
GMM: 0
| [95m13       [0m | [95m-3.063e+0[0m | [95m2.09     [0m | [95m1.648    [0m | [95m0.1063   [0m | [95m2.931    [0m | [95m1.652    [0m | [95m0.1003   [0m | [95m2.103    [0m | [95m0.1672   [0m | [95m0.3933   [0m | [95m0.175    [0m | [95m2.253    [0m | [95m2.859    [0m | [95m1.126    [0m | [95m2.587    [0m | [95m31.22    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4998515914033822




After synthesis: 0.3687417109406069




After synthesis: 0.4997802702336954




After synthesis: 0.4490279823222399




After synthesis: 24.4803639442341




After synthesis: 0.3088459483376882




After synthesis: 0.46055338990139144




After synthesis: 0.44937517082667666




After synthesis: 0.47938750447879064




After synthesis: 0.471504884865904




After synthesis: 0.4432072306631912




After synthesis: 0.48206846517319746




After synthesis: 0.48001419546228974




After synthesis: 0.4921864020903138
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m14       [0m | [0m-3.383e+0[0m | [0m2.639    [0m | [0m0.5553   [0m | [0m0.7995   [0m | [0m0.8685   [0m | [0m0.2962   [0m | [0m1.221    [0m | [0m2.215    [0m | [0m1.013    [0m | [0m0.6623   [0m | [0m1.247    [0m | [0m2.412    [0m | [0m0.6136   [0m | [0m1.365    [0m | [0m1.149    [0m | [0m154.2    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000007762590621




After synthesis: 0.3711947278006158




After synthesis: 0.4999174979617651




After synthesis: 0.4476252747841011




After synthesis: 24.598998795235662




After synthesis: 0.30427159531745596




After synthesis: 0.46168491408123724




After synthesis: 0.45250454472408985




After synthesis: 0.4752705822593282




After synthesis: 0.4752705822593282




After synthesis: 0.44734204202205186




After synthesis: 0.4834104395480407




After synthesis: 0.4757785006293299




After synthesis: 0.49231317352538384
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m15       [0m | [0m-3.368e+0[0m | [0m1.221    [0m | [0m1.044    [0m | [0m1.012    [0m | [0m0.2292   [0m | [0m0.7429   [0m | [0m1.901    [0m | [0m1.835    [0m | [0m1.473    [0m | [0m1.543    [0m | [0m0.2182   [0m | [0m2.814    [0m | [0m1.55     [0m | [0m0.1917   [0m | [0m0.7957   [0m | [0m123.9    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000346482081753




After synthesis: 0.36061711675253016




After synthesis: 0.49988462643771847




After synthesis: 0.45040841590745717




After synthesis: 24.604858379160746




After synthesis: 0.31207296830070114




After synthesis: 0.4622730163610562




After synthesis: 0.45115823431924124




After synthesis: 0.47605315857173147




After synthesis: 0.4753633531563787




After synthesis: 0.4468442155015939




After synthesis: 0.48280377617150805




After synthesis: 0.47887874045079476




After synthesis: 0.4934066809623766
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m16       [0m | [0m-3.412e+0[0m | [0m0.5841   [0m | [0m1.313    [0m | [0m1.353    [0m | [0m0.7286   [0m | [0m1.911    [0m | [0m1.314    [0m | [0m2.056    [0m | [0m2.526    [0m | [0m1.194    [0m | [0m1.271    [0m | [0m1.575    [0m | [0m2.496    [0m | [0m0.7434   [0m | [0m1.31     [0m | [0m109.2    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000142043419729




After synthesis: 0.3717065944663543




After synthesis: 0.49974751071998735




After synthesis: 0.45027136901236525




After synthesis: 24.682068452387945




After synthesis: 0.30371525501540103




After synthesis: 0.4606733726414987




After synthesis: 0.4525712873257519




After synthesis: 0.47361658591844646




After synthesis: 0.47115273488766146




After synthesis: 0.442537302751779




After synthesis: 0.48030313906005234




After synthesis: 0.4784497216260113




After synthesis: 0.4932658025160878
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m17       [0m | [0m-3.406e+0[0m | [0m0.2907   [0m | [0m0.2194   [0m | [0m1.328    [0m | [0m0.3512   [0m | [0m0.4828   [0m | [0m1.701    [0m | [0m2.331    [0m | [0m0.851    [0m | [0m0.1915   [0m | [0m2.113    [0m | [0m0.245    [0m | [0m0.6584   [0m | [0m0.3315   [0m | [0m0.6899   [0m | [0m101.0    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49998202472906733




After synthesis: 0.36332369213190485




After synthesis: 0.4994762746653496




After synthesis: 0.45330187174005465




After synthesis: 24.48643720067956




After synthesis: 0.28744508252536344




After synthesis: 0.4931234118747412




After synthesis: 0.4550674230848356




After synthesis: 0.4801795793178597




After synthesis: 0.4732303897135722




After synthesis: 0.44909753128709257




After synthesis: 0.4821466300525048




After synthesis: 0.4800556097685113




After synthesis: 0.49126935769519353
pmse_ratios: 0
sXs: 0
GMM: 0
| [95m18       [0m | [95m-2.533e+0[0m | [95m2.088    [0m | [95m1.661    [0m | [95m0.001011 [0m | [95m3.0      [0m | [95m1.605    [0m | [95m0.01822  [0m | [95m2.195    [0m | [95m0.1154   [0m | [95m0.2966   [0m | [95m0.1744   [0m | [95m2.278    [0m | [95m2.903    [0m | [95m1.11     [0m | [95m2.669    [0m | [95m30.82    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49988462643771847




After synthesis: 0.3732327965508253




After synthesis: 0.49965971108827645




After synthesis: 0.45081823323817927




After synthesis: 24.38072543872396




After synthesis: 0.3175055240301258




After synthesis: 0.4603128049762188




After synthesis: 0.4442400255144349




After synthesis: 0.4741426483141052




After synthesis: 0.47568657365047756




After synthesis: 0.4419378563717718




After synthesis: 0.4815162659752648




After synthesis: 0.47531699118081944




After synthesis: 0.49345330462538406
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m19       [0m | [0m-3.299e+0[0m | [0m2.817    [0m | [0m1.826    [0m | [0m0.9859   [0m | [0m0.1381   [0m | [0m0.6378   [0m | [0m1.646    [0m | [0m0.01775  [0m | [0m2.969    [0m | [0m0.7131   [0m | [0m2.473    [0m | [0m1.378    [0m | [0m0.2794   [0m | [0m0.1137   [0m | [0m0.1295   [0m | [0m131.0    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000270272196761




After synthesis: 0.37055267588020474




After synthesis: 0.49994363230598415




After synthesis: 0.4533679632888066




After synthesis: 24.464452213749556




After synthesis: 0.31028692210471054




After synthesis: 0.47366464675160225




After synthesis: 0.4535659134308788




After synthesis: 0.4708997534441267




After synthesis: 0.4689900466138572




After synthesis: 0.4468442155015939




After synthesis: 0.48363499061201415




After synthesis: 0.4805898532042456




After synthesis: 0.4912153847366532
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m20       [0m | [0m-3.152e+0[0m | [0m1.895    [0m | [0m0.1017   [0m | [0m0.1331   [0m | [0m0.1539   [0m | [0m2.749    [0m | [0m1.978    [0m | [0m2.24     [0m | [0m1.273    [0m | [0m0.5725   [0m | [0m1.45     [0m | [0m0.7853   [0m | [0m1.915    [0m | [0m1.673    [0m | [0m1.015    [0m | [0m136.9    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.499973394756377




After synthesis: 0.3655911833046621




After synthesis: 0.4998515914033822




After synthesis: 0.4523039903623383




After synthesis: 24.568306314360765




After synthesis: 0.2938066250684294




After synthesis: 0.4770458672111708




After synthesis: 0.44584024701647296




After synthesis: 0.47609877126723693




After synthesis: 0.46744651904196305




After synthesis: 0.4493751708266766




After synthesis: 0.48230242014937835




After synthesis: 0.48623917088714685




After synthesis: 0.4918518472613316
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m21       [0m | [0m-2.951e+0[0m | [0m2.027    [0m | [0m0.1343   [0m | [0m0.03042  [0m | [0m0.03578  [0m | [0m2.907    [0m | [0m1.108    [0m | [0m1.138    [0m | [0m0.3795   [0m | [0m2.223    [0m | [0m0.003337 [0m | [0m1.149    [0m | [0m2.345    [0m | [0m1.249    [0m | [0m1.689    [0m | [0m18.97    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.500035454655189




After synthesis: 0.3692611278264873




After synthesis: 0.49939270703162597




After synthesis: 0.4536976100080298




After synthesis: 24.648466674660884




After synthesis: 0.31207296830070114




After synthesis: 0.4642913113090513




After synthesis: 0.4499966123363045




After synthesis: 0.4770458672111708




After synthesis: 0.47437987461543685




After synthesis: 0.45323572606785517




After synthesis: 0.48091480168248874




After synthesis: 0.4752705822593282




After synthesis: 0.4940219549178845
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m22       [0m | [0m-3.486e+0[0m | [0m1.816    [0m | [0m2.068    [0m | [0m2.763    [0m | [0m1.596    [0m | [0m2.861    [0m | [0m2.502    [0m | [0m1.941    [0m | [0m2.547    [0m | [0m1.092    [0m | [0m2.41     [0m | [0m0.4542   [0m | [0m2.68     [0m | [0m2.803    [0m | [0m0.2025   [0m | [0m76.66    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4999860977018204




After synthesis: 0.37247145158878325




After synthesis: 0.49962016393554043




After synthesis: 0.45487318485547823




After synthesis: 24.52203806846145




After synthesis: 0.3061144794686533




After synthesis: 0.46132959499957793




After synthesis: 0.4479780512373392




After synthesis: 0.4770012327404748




After synthesis: 0.46977226961919166




After synthesis: 0.44670147122115644




After synthesis: 0.4815560019271969




After synthesis: 0.47748966000506793




After synthesis: 0.4929795077280039
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m23       [0m | [0m-3.38e+03[0m | [0m2.617    [0m | [0m0.1331   [0m | [0m0.6887   [0m | [0m1.235    [0m | [0m2.787    [0m | [0m2.063    [0m | [0m1.683    [0m | [0m0.9741   [0m | [0m1.857    [0m | [0m1.843    [0m | [0m2.024    [0m | [0m1.238    [0m | [0m1.664    [0m | [0m0.8721   [0m | [0m137.2    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.4988288049874105




After synthesis: 0.37029516225268333




After synthesis: 0.499757072524107




After synthesis: 0.4503399200591215




After synthesis: 24.793350381791676




After synthesis: 0.30482632494322526




After synthesis: 0.46451802961071026




After synthesis: 0.4533679632888066




After synthesis: 0.4796812390949423




After synthesis: 0.4729387450395342




After synthesis: 0.44902798232223984




After synthesis: 0.4824187908610166




After synthesis: 0.47963941413123545




After synthesis: 0.49278527889689483
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m24       [0m | [0m-3.513e+0[0m | [0m1.052    [0m | [0m1.004    [0m | [0m2.726    [0m | [0m1.311    [0m | [0m0.5965   [0m | [0m2.243    [0m | [0m2.714    [0m | [0m2.738    [0m | [0m2.343    [0m | [0m1.461    [0m | [0m1.856    [0m | [0m2.935    [0m | [0m0.00121  [0m | [0m0.5318   [0m | [0m60.84    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49995419858200774




After synthesis: 0.3651932202694292




After synthesis: 0.49996883774808487




After synthesis: 0.4525045447240899




After synthesis: 24.336038018505555




After synthesis: 0.30090907977381515




After synthesis: 0.4589133523238448




After synthesis: 0.44511630462984975




After synthesis: 0.4744744344639167




After synthesis: 0.4723503054541531




After synthesis: 0.4513615759857766




After synthesis: 0.4831464326076483




After synthesis: 0.4745216435863969




After synthesis: 0.4924888792275633
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m25       [0m | [0m-3.275e+0[0m | [0m1.781    [0m | [0m1.537    [0m | [0m0.7556   [0m | [0m0.3236   [0m | [0m2.368    [0m | [0m0.3657   [0m | [0m0.2289   [0m | [0m1.536    [0m | [0m2.775    [0m | [0m1.654    [0m | [0m0.7128   [0m | [0m1.783    [0m | [0m1.482    [0m | [0m1.707    [0m | [0m19.28    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000343659514132




After synthesis: 0.36439449638032656




After synthesis: 0.4999616996958591




After synthesis: 0.4542875752026095




After synthesis: 24.65496305130922




After synthesis: 0.3039008812922957




After synthesis: 0.4724488563826021




After synthesis: 0.446486930545768




After synthesis: 0.48123684931682387




After synthesis: 0.472399604828919




After synthesis: 0.44612822959289766




After synthesis: 0.4820293152375733




After synthesis: 0.4777975588632163




After synthesis: 0.49233840103724164
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m26       [0m | [0m-3.187e+0[0m | [0m1.563    [0m | [0m0.1102   [0m | [0m0.08656  [0m | [0m1.802    [0m | [0m2.049    [0m | [0m1.912    [0m | [0m2.317    [0m | [0m1.597    [0m | [0m0.1113   [0m | [0m0.8575   [0m | [0m0.1396   [0m | [0m2.169    [0m | [0m1.22     [0m | [0m0.3699   [0m | [0m136.3    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000261804360061




After synthesis: 0.3693907314684773




After synthesis: 0.4996706465341876




After synthesis: 0.4503399200591215




After synthesis: 24.251972973143758




After synthesis: 0.30166141605210156




After synthesis: 0.46314560705334706




After synthesis: 0.4512260697403637




After synthesis: 0.47810319300946935




After synthesis: 0.4763261342869661




After synthesis: 0.45532565855359514




After synthesis: 0.4849843541743332




After synthesis: 0.477401272322849




After synthesis: 0.4934765535176397
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m27       [0m | [0m-3.435e+0[0m | [0m0.2385   [0m | [0m2.364    [0m | [0m2.737    [0m | [0m2.743    [0m | [0m1.188    [0m | [0m0.4791   [0m | [0m2.124    [0m | [0m1.389    [0m | [0m1.426    [0m | [0m0.2354   [0m | [0m2.814    [0m | [0m2.575    [0m | [0m0.8219   [0m | [0m2.576    [0m | [0m134.7    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000086395946136




After synthesis: 0.36506035905580325




After synthesis: 0.4995540163438102




After synthesis: 0.451968643119974




After synthesis: 24.39825682650917




After synthesis: 0.31171709520408153




After synthesis: 0.4702876697400485




After synthesis: 0.44804843819373413




After synthesis: 0.47125358994413463




After synthesis: 0.47120318650903403




After synthesis: 0.449513657165288




After synthesis: 0.4806713625987614




After synthesis: 0.4778413594382356




After synthesis: 0.49349976045830685
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m28       [0m | [0m-3.248e+0[0m | [0m1.359    [0m | [0m2.674    [0m | [0m0.3408   [0m | [0m0.1982   [0m | [0m0.2681   [0m | [0m1.404    [0m | [0m0.5215   [0m | [0m2.697    [0m | [0m2.428    [0m | [0m2.827    [0m | [0m1.385    [0m | [0m1.926    [0m | [0m2.836    [0m | [0m2.973    [0m | [0m10.48    [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.49997992774005784




After synthesis: 0.3706812840181353




After synthesis: 0.49996883774808487




After synthesis: 0.4539603555665313




After synthesis: 24.659767421572884




After synthesis: 0.3101073941221968




After synthesis: 0.46162582242026795




After synthesis: 0.45330187174005465




After synthesis: 0.47930316864144823




After synthesis: 0.47600749914774887




After synthesis: 0.4442400255144349




After synthesis: 0.48099558531668535




After synthesis: 0.47489761917933926




After synthesis: 0.4934765535176397
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m29       [0m | [0m-3.383e+0[0m | [0m1.37     [0m | [0m2.415    [0m | [0m0.9288   [0m | [0m2.15     [0m | [0m1.332    [0m | [0m0.2925   [0m | [0m2.804    [0m | [0m1.636    [0m | [0m1.099    [0m | [0m1.71     [0m | [0m2.285    [0m | [0m2.176    [0m | [0m2.135    [0m | [0m0.5451   [0m | [0m53.0     [0m |


 -0.60102348]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_train.loc[:,['Churn']] = scaler.transform(norm_train.loc[:,['Churn']])


After synthesis: 0.5000261804360061




After synthesis: 0.363726028902465




After synthesis: 0.49998609770182045




After synthesis: 0.448819001222073




After synthesis: 24.230331202881846




After synthesis: 0.2988248223316525




After synthesis: 0.45710946188756507




After synthesis: 0.44804843819373413




After synthesis: 0.4784497216260113




After synthesis: 0.470696499351588




After synthesis: 0.44965192149040134




After synthesis: 0.4826503201943736




After synthesis: 0.48448092653057684




After synthesis: 0.4925387009197418
pmse_ratios: 0
sXs: 0
GMM: 0
| [0m30       [0m | [0m-3.292e+0[0m | [0m0.8325   [0m | [0m0.4242   [0m | [0m0.7424   [0m | [0m0.7377   [0m | [0m2.844    [0m | [0m1.928    [0m | [0m2.061    [0m | [0m1.44     [0m | [0m2.565    [0m | [0m0.009024 [0m | [0m1.544    [0m | [0m2.718    [0m | [0m2.14     [0m | [0m1.758    [0m | [0m19.47    [0m |
Final Result:  {'target': np.float64(-2533.295322813445), 'params': {'C_Dependents': np.float64(2.087884244334957), 'C_DeviceProtection': np.float64(1.6608343270560781), 'C_MultipleLines': np.float64(0.001010899731055634), 'C_OnlineBackup': np.float64(3.0), 'C_OnlineSecurity': np.float64(1.6053794512368083), 'C_PaperlessBilling': np.float64(0.018219961951766915), 'C_Partner': np.float64(2.195320691268981), 'C_PhoneService': np.float64(0.11539401049820197), 'C_SeniorCitizen': np.float64(0.29662768387110994), 'C_StreamingMovies': np.float64(0.17443527068941236), 'C_StreamingTV': np.float6

In [None]:
optimization_results

In [None]:
optimized_datasets = []

for r in random_states:
    best_result, optimizer = optimize_models_mn(train_data=train_data, number_synthetic_datasets=nsd, number_gmm_initializations=ngi, random_state=r)
    
    best_params = best_result['params']
    
    # Run train_models_mn again with best params to get the synthetic datasets
    pmse_ratios, sXs, GMM = train_models_mn(
        train_data=train_data,
        number_synthetic_datasets=nsd,
        number_gmm_initializations=ngi,
        num_components_=int(best_params['num_components_']),
        C_hiking_int=best_params['C_hiking_int'],
        C_sustain_int=best_params['C_sustain_int'],
        C_online_int=best_params['C_online_int']
    )
    
    optimized_datasets.append(sXs)


In [None]:
optimized_datasets[0][0]

In [None]:
optimized_datasets[0][0].to_csv('synthesized_data_part_dependent_aa.csv')

In [None]:
run_targets = [np.minimum.accumulate(-i[1].space.target) for i in optimization_results]

In [None]:
print(run_targets)

In [None]:
fig = plt.figure()
plt.plot(run_targets[0])
plt.scatter(np.arange(len(run_targets[0])), run_targets[0], s=6)
plt.plot(run_targets[1])
plt.scatter(np.arange(len(run_targets[1])), run_targets[1], s=6)
plt.title("CPS ASEC Data - Objective Value for MNL Synthesis")
plt.xlabel("Iteration")
plt.ylabel("Minimum Objective Value")
fig.savefig('../Images/min_objective_mnl_ipums.pdf')
plt.show()

Choose the params that gave the best objective value across all random seeds.

In [None]:
best_params = optimization_results[np.argmax([x[0]['target'] for x in optimization_results])][0]

In [None]:
best_params

Generate 20 synthetic data sets.

On occassion, the synthesis models will produce a significantly different fit than what was observed during the optimization process. In these cases, retrain the models until a fit (judged by the pMSE ratio) consistent with the optimization results is observed.

In [None]:
pmse_ratios, full_sXs, GMM = train_models_mn(train_data=train_data,
                                             number_synthetic_datasets=20,
                                             # hyperparameters for GMM
                                             number_gmm_initializations=ngi,
                                             num_components_=int(best_params['params']['num_components_']),
                                             # hyperparameters for CART, end with underscore means Bayesian optimization will choose
                                             C_non_white_=best_params['params']['C_non_white_'],
                                             C_sex_=best_params['params']['C_sex_'])

In [None]:
np.mean(pmse_ratios)

In [None]:
plt.violinplot(pmse_ratios)
plt.xlabel("Density")
plt.ylabel("pMSE Ratio")
plt.title("Distribution of pMSE Ratios")
plt.show()

# Save the synthetic datasets.

Save the GMM model.

In [None]:
for i, sX in enumerate(full_sXs):
    sX.to_csv("../Data/IPUMS/Synthetic Datasets/gmm_and_mnl_" + str(i) + ".csv", index=False)

# save
with open('../Results/IPUMS/Models/gmm_and_mnl.pkl','wb') as f:
    pickle.dump(GMM,f)

In [None]:
# full_sXs = [pd.read_csv("../Data/IPUMS/Synthetic Datasets/gmm_and_mnl_" + str(i) + ".csv") for i in range(20)]

# # load
# with open('../Results/IPUMS/Models/gmm_and_mnl.pkl', 'rb') as f:
#     GMM = pickle.load(f)

***

Now apply the attribute disclosure prevention algorithm.

We don't have a great baseline for the probability of being non-white, so we'll just use the proportion from the data.

In [None]:
# prior = np.mean(train_data.non_white == 1)

In [None]:
# prior

For our threshold, we select $c = 10$, i.e., we are allowing for a 10x increase in the probability of an adversary inferring the non-white status based on the synthetic data. This is a relatively large increase. For example, this means going from XXX under the prior to no more than XXX under the updated probability.

In [None]:
# c = 3

We provide a range of $\delta$ values over which to evaluate and prevent attribute disclosure.

In [None]:
# deltas = np.linspace(0.001, 0.1, 5)

In [None]:
# ad_sXs = [attribute_disclosure_reduction(original_data=train_data, 
#                                          synthetic_data=X,
#                                          continuous_vars=['incwage', 'years_of_educ', 'potential_experience'],
#                                          categorical_vars=['sex'],
#                                          sensitive_var='non_white',
#                                          num_mixture_components=190,#int(best_params['params']['num_components_']),
#                                          deltas=deltas, 
#                                          c=c, 
#                                          prior_prob=prior) for X in full_sXs]

***

In [None]:
# num_records = train_data.shape[0]

In [None]:
# np.mean([Z.shape[0] - num_records for Z in ad_sXs])

***

In [None]:
# for i, sX in enumerate(ad_sXs):
    
#     # normalize
#     sX.loc[:,['incwage', 'years_of_educ', 'potential_experience']] = StandardScaler().fit_transform(sX.loc[:,['incwage', 'years_of_educ', 'potential_experience']])
#     full_sXs[i].loc[:,['incwage', 'years_of_educ', 'potential_experience']] = StandardScaler().fit_transform(full_sXs[i].loc[:,['incwage', 'years_of_educ', 'potential_experience']])

#     # convert to original scale (un-normalize?)
#     H = train_data.sample(frac=1.0, replace=True, ignore_index=True).loc[:,['incwage', 'years_of_educ', 'potential_experience']]
#     scaler = StandardScaler().fit(H.loc[:, ['incwage', 'years_of_educ', 'potential_experience']])
#     sX.loc[:,['incwage', 'years_of_educ', 'potential_experience']] = scaler.inverse_transform(sX.loc[:,['incwage', 'years_of_educ', 'potential_experience']])
#     full_sXs[i].loc[:,['incwage', 'years_of_educ', 'potential_experience']] = scaler.inverse_transform(full_sXs[i].loc[:,['incwage', 'years_of_educ', 'potential_experience']])
#     sX.to_csv("../Data/IPUMS/Synthetic Datasets/ad_gmm_and_mnl_" + str(i) + ".csv", index=False)
#     full_sXs[i].to_csv("../Data/IPUMS/Synthetic Datasets/gmm_and_mnl_" + str(i) + ".csv", index=False)