In [None]:
import json
import pandas as pd
import statistics as stats
import re
import pprint

In [293]:
ALL_TABULAR_DATASETS = ["adult", "census", "child", "covtype", "credit", "insurance", "health_insurance", 
                       "intrusion", "drugs", "loan", "pums"]
ALL_TABULAR_MODELS = {
    "gretel": ["actgan"], 
    "sdv": ["ctgan", "tvae", "gaussian_copula"], 
    "syn": ["ctgan", "tvae", "goggle", "arf", "ddpm", "nflow", "rtvae"], 
    "llm": ["great"], 
    "betterdata": ["gan", "gan_dp"]
}

In [294]:
def get_cmds_configs(command_string):

    # Sample command string
    # command_string = "python3 run_model.py --m sequential --l sdv --s par --data nasdaq --o outputs --run_model_training --use_gpu"

    # Regular expression pattern to extract arguments for "--l", "--s", and "--data"
    pattern = r"--l (\S+).*?--s (\S+).*?--data (\S+)"

    # Use regex to find the match
    match = re.search(pattern, command_string)

    # Extract values from the match
    library = match.group(1) if match else None
    synthesizer = match.group(2) if match else None
    data = match.group(3) if match else None

    return (library, synthesizer, data)

In [295]:
sdg_jobs_df = pd.read_csv("../final_outs/sdg_jobs.csv")

if 'Unnamed: 0' in sdg_jobs_df.columns:
    sdg_jobs_df.drop(columns=['Unnamed: 0'], inplace=True)

In [296]:
# Iterate over rows and print 'Command Parameters' for specific 'Status' values
TABULAR_INCOMPLETE_JOBS = {
    "gretel": {}, 
    "sdv": {}, 
    "syn": {}
}
for index, row in sdg_jobs_df.iterrows():
    if row['Status'] in ['Failed', 'Canceled', 'Running']:
        (library, synthesizer, data) = get_cmds_configs(row['Command Parameters'])
        if library and synthesizer and data:
            # print(library, synthesizer, data)
            
            if library == "synthcity":
                library = "syn"
            
            if synthesizer in TABULAR_INCOMPLETE_JOBS[library]:
                TABULAR_INCOMPLETE_JOBS[library][synthesizer][data] = row['Status']
            else:
                TABULAR_INCOMPLETE_JOBS[library][synthesizer] = {}
                TABULAR_INCOMPLETE_JOBS[library][synthesizer][data] = row['Status']
                
                
pprint.pprint(TABULAR_INCOMPLETE_JOBS)

{'gretel': {'dgan': {'pums': 'Failed'}},
 'sdv': {'ctgan': {'insurance': 'Failed'}, 'par': {'pums': 'Failed'}},
 'syn': {'arf': {'pums': 'Failed'},
         'ctgan': {'covtype': 'Running',
                   'credit': 'Canceled',
                   'pums': 'Failed'},
         'ddpm': {'pums': 'Canceled'},
         'goggle': {'adult': 'Failed',
                    'covtype': 'Failed',
                    'intrusion': 'Failed',
                    'loan': 'Failed',
                    'pums': 'Running'},
         'nflow': {'adult': 'Failed',
                   'covtype': 'Failed',
                   'loan': 'Failed',
                   'pums': 'Failed'},
         'rtvae': {'credit': 'Canceled', 'pums': 'Failed'},
         'tvae': {'pums': 'Failed'}}}


In [308]:
ERROR_VAL  = -99
def get_scores_df(exp_dataset, config="tabular"):
    
    final_report = {
        "dataset": [], 
        "model": [],
        "domain_cov": [], 
        "stats_cov": [], 
        "outliers_cov": [],
        "ks_sim": [], 
        "tv_sim": [],
        "corr_sim": [], 
        "contin_sim": [], 
        "sdv_quality_report": [],
        "wass_dist": [], 
        "js_dist": [], 
        "new_row_synthesis": []
    }

    for lib, models in ALL_TABULAR_MODELS.items():
        
        if config == "hpo" and lib!= "syn":
            continue
        for model in models: 
                
            corr_file = f"../metrics_out/{config}/{model}_{lib}/{exp_dataset}/{exp_dataset}_{model}_correlation.csv"
            metrics_file = f"../metrics_out/{config}/{model}_{lib}/{exp_dataset}/{exp_dataset}_{model}_metrics.json"
            
            final_report["model"].append(f"{model}_{lib}")
            final_report["dataset"].append(exp_dataset)
            
            try:
                with open(metrics_file, 'r') as file:
                    metrics_scores = json.load(file)
                corr_df = pd.read_csv(corr_file)
            except Exception as e:
                # print(model, lib)
                status = None
                if lib in TABULAR_INCOMPLETE_JOBS and model in TABULAR_INCOMPLETE_JOBS[lib]:
                    if exp_dataset in TABULAR_INCOMPLETE_JOBS[lib][model]:
                        # print("Status: ",  TABULAR_INCOMPLETE_JOBS[lib][model][exp_dataset])
                        # print("-")
                        status = TABULAR_INCOMPLETE_JOBS[lib][model][exp_dataset]
                    else:
                        status = "Skipped"
                else:
                    status = "Skipped"
                    # print(e)
                    # print("Skipped")
                for k in final_report.keys():
                    if k not in ["model", "dataset"]: 
                        final_report[k].append(status)
                    # print(final_report)
                continue
                
            
            # print(final_report) 
            
            if metrics_scores["coverage"]["domain_coverage"].values():
                domain_coverage_mean = round(stats.mean(metrics_scores["coverage"]["domain_coverage"].values()), 2)
                final_report["domain_cov"].append(domain_coverage_mean)
            else:
                # Handle the case for empty list, e.g., append a default value or skip
                final_report["domain_cov"].append(ERROR_VAL)  # or any other default value
            
            
            if metrics_scores["coverage"]["outlier_coverage"].values():
                outlier_coverage_mean = round(stats.mean(metrics_scores["coverage"]["outlier_coverage"].values()), 2)
                final_report["outliers_cov"].append(outlier_coverage_mean)
            else:
                # Handle the case for empty list, e.g., append a default value or skip
                final_report["outliers_cov"].append(ERROR_VAL)  # or any other default value

            # final_report["outliers_cov"].append(round(stats.mean(metrics_scores["coverage"]["outlier_coverage"].values()), 2))

            # Initialize total and count variables
            total_average = 0
            count = 0
            # Calculate the average of mean, median, and std for each column
            for c, s in metrics_scores["similarity"]["statistic"].items():
                column_average = (s["mean"] + s["median"] + s["std"]) / 3
                total_average += column_average
                count += 1
            # Calculate the overall average
            overall_average = total_average / count if count > 0 else ERROR_VAL
            final_report["stats_cov"].append(round(overall_average, 2))


            ks_scores = []
            tv_scores = []
            for key, value in metrics_scores["sdv_quality_report"]["distribution"].items():
                if value["metric"] == "KSComplement":
                    ks_scores.append(value["score"])
                elif value["metric"] == "TVComplement":
                    tv_scores.append(value["score"])

            # Calculating averages
            ks_average = stats.mean(ks_scores) if ks_scores else ERROR_VAL
            tv_average = stats.mean(tv_scores) if tv_scores else ERROR_VAL
            final_report["ks_sim"].append(round(ks_average, 2))
            final_report["tv_sim"].append(round(tv_average, 2))


            # contingency_average, correlation_average
            contingency_average = corr_df[corr_df['Metric'] == 'ContingencySimilarity']["Score"].mean()
            correlation_average = corr_df[corr_df['Metric'] == 'CorrelationSimilarity']["Score"].mean()

            final_report["contin_sim"].append(round(contingency_average, 2))
            final_report["corr_sim"].append(round(correlation_average, 2))

            final_report["sdv_quality_report"].append(round(metrics_scores["sdv_quality_report"]["score"], 2))
            
            if metrics_scores["similarity"]["js_distance"].values():
                js_dist_mean = round(stats.mean(metrics_scores["similarity"]["js_distance"].values()), 2)
                final_report["js_dist"].append(js_dist_mean)
            else:
                # Handle the case for empty list, e.g., append a default value or skip
                final_report["js_dist"].append(ERROR_VAL)  # or any other default value
                
            if metrics_scores["similarity"]["wass_distance"].values():
                wass_dist_mean = round(stats.mean(metrics_scores["similarity"]["wass_distance"].values()), 2)
                final_report["wass_dist"].append(wass_dist_mean)
            else:
                # Handle the case for empty list, e.g., append a default value or skip
                final_report["wass_dist"].append(ERROR_VAL)  # or any other default value

            # # final_report["js_dist"].append(round(stats.mean(metrics_scores["similarity"]["js_distance"].values()), 2))
            # final_report["wass_dist"].append(round(stats.mean(metrics_scores["similarity"]["wass_distance"].values()), 2))
            
            if "score" in metrics_scores["privacy"]["new_row_synthesis"]:
                final_report["new_row_synthesis"].append(round(metrics_scores["privacy"]["new_row_synthesis"]["score"], 2))
            else:
                final_report["new_row_synthesis"].append(ERROR_VAL)
            
    return final_report

In [311]:
# # exp_dataset = "adult"
# ALL_TABULAR_DATASETS = ["adult", "loan"]

# for exp_dataset in ALL_TABULAR_DATASETS:
    
#     final_report =
    
#     try:
#         pd.DataFrame(final_report)
#     except Exception as e:
#         print(e)
#         print("~"*10, exp_dataset)
#         for k, v in final_report.items():
#             print(k, len(v))
            
pd.DataFrame( get_scores_df("adult", "hpo"))

Unnamed: 0,dataset,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,adult,ctgan_syn,0.94,0.99,0.59,0.8,0.83,0.98,0.76,0.81,0.01,0.27,1.0
1,adult,tvae_syn,0.92,0.99,0.57,0.9,0.85,0.99,0.8,0.85,0.01,0.19,1.0
2,adult,goggle_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
3,adult,arf_syn,0.98,1.0,0.81,0.9,0.92,0.99,0.87,0.9,0.01,0.16,1.0
4,adult,ddpm_syn,0.99,1.0,0.72,0.98,0.97,0.99,0.94,0.96,0.01,0.07,1.0
5,adult,nflow_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
6,adult,rtvae_syn,0.96,0.98,0.95,0.84,0.8,0.96,0.74,0.79,0.03,0.23,1.0


In [312]:
pd.DataFrame( get_scores_df("loan", "hpo"))

Unnamed: 0,dataset,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,loan,ctgan_syn,0.97,0.99,0.4,0.91,0.98,0.96,0.93,0.94,0.03,0.17,1.0
1,loan,tvae_syn,0.95,0.98,0.35,0.93,0.98,0.96,0.92,0.94,0.02,0.19,1.0
2,loan,goggle_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
3,loan,arf_syn,1.0,0.99,0.46,0.9,0.99,0.98,0.93,0.94,0.02,0.14,1.0
4,loan,ddpm_syn,1.0,0.94,0.45,0.95,0.97,0.98,0.88,0.93,0.03,0.1,1.0
5,loan,nflow_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
6,loan,rtvae_syn,0.87,0.97,0.35,0.87,0.98,0.94,0.81,0.88,0.04,0.47,1.0


In [298]:
# exp_dataset = "adult"
ALL_TABULAR_DATASETS = ["adult", "census", "child", "covtype", "credit", "insurance", "health_insurance", 
                       "intrusion", "drugs", "loan", "pums"]

for exp_dataset in ALL_TABULAR_DATASETS:
    
    final_report = get_scores_df(exp_dataset)
    
    try:
        pd.DataFrame(final_report)
    except Exception as e:
        print(e)
        print("~"*10, exp_dataset)
        for k, v in final_report.items():
            print(k, len(v))

In [299]:
pd.DataFrame(get_scores_df("adult"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.89,0.99,0.49,0.79,0.92,0.97,0.87,0.88,0.02,0.24,1.0
1,ctgan_sdv,0.87,0.99,0.79,0.87,0.91,0.98,0.82,0.87,0.02,0.22,1.0
2,tvae_sdv,0.79,0.99,0.68,0.88,0.94,0.97,0.88,0.91,0.02,0.13,1.0
3,gaussian_copula_sdv,0.92,0.95,0.41,0.7,0.8,0.99,0.73,0.76,0.07,0.29,1.0
4,ctgan_syn,0.91,0.99,0.49,0.76,0.79,0.98,0.74,0.78,0.02,0.34,1.0
5,tvae_syn,0.86,0.99,0.65,0.81,0.89,0.99,0.81,0.84,0.01,0.3,1.0
6,goggle_syn,0.33,0.95,0.27,0.62,0.67,0.97,0.49,0.59,0.06,0.54,1.0
7,arf_syn,0.99,1.0,0.81,0.88,0.91,0.99,0.87,0.9,0.01,0.16,1.0
8,ddpm_syn,0.99,1.0,0.68,0.98,0.97,0.98,0.95,0.97,0.01,0.06,1.0
9,nflow_syn,0.97,0.98,0.7,0.89,0.77,0.97,0.69,0.77,0.02,0.23,1.0


In [243]:
pd.DataFrame(get_scores_df("census"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.92,0.97,0.41,0.87,0.91,0.98,0.88,0.89,0.02,0.2,0.98
1,ctgan_sdv,,,,,,,,,,,
2,tvae_sdv,0.91,0.98,0.38,0.93,0.95,0.98,0.92,0.93,0.02,0.1,0.9
3,gaussian_copula_sdv,0.93,0.89,0.29,0.5,0.77,0.98,0.64,0.68,0.11,0.42,1.0
4,ctgan_syn,,,,,,,,,,,
5,tvae_syn,,,,,,,,,,,
6,goggle_syn,,,,,,,,,,,
7,arf_syn,,,,,,,,,,,
8,ddpm_syn,,,,,,,,,,,
9,nflow_syn,,,,,,,,,,,


In [244]:
pd.DataFrame(get_scores_df("child"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,1.0,-99,-99,-99,0.93,,0.89,0.91,-99,-99,0.79
1,ctgan_sdv,0.97,-99,-99,-99,0.9,,0.84,0.87,-99,-99,0.89
2,tvae_sdv,0.97,-99,-99,-99,0.95,,0.91,0.93,-99,-99,0.86
3,gaussian_copula_sdv,0.94,-99,-99,-99,0.85,,0.71,0.78,-99,-99,1.0
4,ctgan_syn,1.0,-99,-99,-99,0.94,,0.9,0.92,-99,-99,0.86
5,tvae_syn,1.0,-99,-99,-99,0.97,,0.94,0.95,-99,-99,0.91
6,goggle_syn,1.0,-99,-99,-99,0.75,,0.59,0.67,-99,-99,0.85
7,arf_syn,1.0,-99,-99,-99,0.96,,0.93,0.95,-99,-99,0.84
8,ddpm_syn,1.0,-99,-99,-99,0.96,,0.94,0.95,-99,-99,0.85
9,nflow_syn,1.0,-99,-99,-99,0.91,,0.86,0.89,-99,-99,0.92


In [245]:
pd.DataFrame(get_scores_df("covtype"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.98,0.99,0.8,0.94,0.98,0.97,0.95,0.96,0.02,0.11,1.0
1,ctgan_sdv,0.84,0.97,0.68,0.88,0.96,0.97,0.91,0.93,0.04,0.15,1.0
2,tvae_sdv,0.76,0.97,0.42,0.84,0.98,0.96,0.93,0.94,0.04,0.22,1.0
3,gaussian_copula_sdv,0.75,0.99,0.56,0.95,0.17,0.98,0.17,0.26,0.01,0.1,1.0
4,ctgan_syn,Running,Running,Running,Running,Running,Running,Running,Running,Running,Running,Running
5,tvae_syn,,,,,,,,,,,
6,goggle_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
7,arf_syn,1.0,1.0,0.79,0.97,1.0,0.99,0.99,0.99,0.01,0.06,1.0
8,ddpm_syn,0.99,1.0,0.88,0.99,1.0,0.99,0.98,0.99,0.0,0.06,1.0
9,nflow_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed


In [246]:
pd.DataFrame(get_scores_df("credit"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.22,1.0,0.77,0.9,0.42,0.96,0.28,0.9,0.01,0.12,0.96
1,ctgan_sdv,0.19,1.0,0.96,0.98,1.0,0.97,0.55,0.96,0.0,0.06,0.92
2,tvae_sdv,0.15,0.99,0.37,0.93,1.0,0.96,0.54,0.93,0.01,0.14,0.72
3,gaussian_copula_sdv,0.25,1.0,0.62,0.88,0.0,0.98,0.1,0.88,0.01,0.15,1.0
4,ctgan_syn,0.56,1.0,0.8,0.93,1.0,0.97,0.64,0.94,0.01,0.08,0.99
5,tvae_syn,0.47,1.0,0.63,0.93,1.0,0.98,0.62,0.94,0.01,0.08,0.98
6,goggle_syn,,,,,,,,,,,
7,arf_syn,0.75,1.0,0.8,0.96,1.0,0.98,0.64,0.96,0.0,0.04,0.91
8,ddpm_syn,,,,,,,,,,,
9,nflow_syn,,,,,,,,,,,


In [247]:
pd.DataFrame(get_scores_df("intrusion"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.74,0.89,0.36,0.83,0.91,0.96,0.78,0.84,0.07,0.18,0.98
1,ctgan_sdv,0.67,0.86,0.36,0.77,0.88,0.92,0.74,0.8,0.12,0.22,0.98
2,tvae_sdv,0.63,0.99,0.15,0.91,0.97,0.93,0.87,0.91,0.02,0.15,0.96
3,gaussian_copula_sdv,0.72,0.85,0.54,0.59,0.63,0.95,0.48,0.56,0.28,0.39,1.0
4,ctgan_syn,0.96,0.9,0.65,0.78,0.96,0.98,0.91,0.91,0.02,0.17,1.0
5,tvae_syn,0.76,0.95,0.35,0.81,0.95,0.97,0.87,0.89,0.02,0.18,0.99
6,goggle_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
7,arf_syn,0.98,0.91,0.69,0.8,0.93,0.98,0.88,0.89,0.06,0.14,0.97
8,ddpm_syn,,,,,,,,,,,
9,nflow_syn,,,,,,,,,,,


In [248]:
pd.DataFrame(get_scores_df("insurance"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,1.0,-99,-99,-99,0.93,,0.89,0.91,-99,-99,0.94
1,ctgan_sdv,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
2,tvae_sdv,0.93,-99,-99,-99,0.76,,0.59,0.67,-99,-99,1.0
3,gaussian_copula_sdv,0.98,-99,-99,-99,0.85,,0.73,0.79,-99,-99,1.0
4,ctgan_syn,1.0,-99,-99,-99,0.96,,0.93,0.94,-99,-99,0.91
5,tvae_syn,0.96,-99,-99,-99,0.96,,0.94,0.95,-99,-99,0.95
6,goggle_syn,0.62,-99,-99,-99,0.77,,0.62,0.7,-99,-99,0.93
7,arf_syn,0.99,-99,-99,-99,0.98,,0.96,0.97,-99,-99,0.87
8,ddpm_syn,1.0,-99,-99,-99,0.98,,0.96,0.97,-99,-99,0.85
9,nflow_syn,1.0,-99,-99,-99,0.92,,0.87,0.9,-99,-99,0.96


In [249]:
pd.DataFrame(get_scores_df("health_insurance"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,1.0,0.95,0.33,0.85,0.88,0.91,0.82,0.85,0.07,0.19,1.0
1,ctgan_sdv,0.98,0.89,0.35,0.75,0.88,0.89,0.81,0.82,0.13,0.24,1.0
2,tvae_sdv,0.91,0.95,0.67,0.81,0.82,0.98,0.74,0.8,0.07,0.2,1.0
3,gaussian_copula_sdv,0.93,0.95,0.11,0.85,0.87,0.94,0.8,0.84,0.07,0.18,1.0
4,ctgan_syn,0.92,0.94,0.32,0.86,0.88,0.98,0.8,0.85,0.07,0.21,1.0
5,tvae_syn,0.93,0.98,0.33,0.89,0.96,0.96,0.86,0.9,0.03,0.26,1.0
6,goggle_syn,0.56,0.9,0.0,0.63,0.73,0.92,0.51,0.63,0.1,0.55,1.0
7,arf_syn,1.0,0.97,0.57,0.9,0.94,0.95,0.88,0.91,0.04,0.12,0.96
8,ddpm_syn,1.0,0.95,0.67,0.89,0.91,0.92,0.83,0.87,0.07,0.21,1.0
9,nflow_syn,0.95,0.98,0.27,0.9,0.87,0.96,0.82,0.86,0.02,0.15,1.0


In [250]:
pd.DataFrame(get_scores_df("drugs"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.9,0.96,0.0,0.87,0.92,0.94,0.82,0.87,0.05,0.18,-99
1,ctgan_sdv,0.89,0.96,1,0.89,0.92,0.95,0.83,0.88,0.06,0.14,-99
2,tvae_sdv,0.77,0.93,0.0,0.83,,0.99,0.76,0.8,0.06,0.18,-99
3,gaussian_copula_sdv,0.87,1.0,0.0,0.92,0.88,0.97,0.73,0.8,0.01,0.08,-99
4,ctgan_syn,-99,-99,-99,0.89,0.85,0.93,0.14,0.5,-99,-99,-99
5,tvae_syn,-99,-99,-99,0.89,0.86,0.94,0.15,0.5,-99,-99,-99
6,goggle_syn,-99,-99,-99,0.85,0.74,0.95,0.08,0.41,-99,-99,-99
7,arf_syn,-99,-99,-99,0.91,0.86,0.94,0.15,0.51,-99,-99,-99
8,ddpm_syn,-99,-99,-99,0.93,0.9,0.93,0.18,0.54,-99,-99,-99
9,nflow_syn,-99,-99,-99,0.9,0.85,0.96,0.15,0.5,-99,-99,-99


In [251]:
pd.DataFrame(get_scores_df("loan"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.99,0.96,0.4,0.83,0.89,0.92,0.85,0.87,0.05,0.16,1.0
1,ctgan_sdv,0.97,0.97,0.37,0.85,0.93,0.91,0.88,0.89,0.04,0.15,1.0
2,tvae_sdv,0.91,0.95,0.2,0.82,0.85,0.96,0.79,0.83,0.07,0.25,1.0
3,gaussian_copula_sdv,0.94,0.98,0.39,0.89,0.97,0.97,0.91,0.93,0.03,0.19,1.0
4,ctgan_syn,0.96,0.96,0.49,0.87,0.96,0.98,0.85,0.9,0.05,0.21,1.0
5,tvae_syn,0.93,0.97,0.36,0.88,0.97,0.96,0.89,0.92,0.04,0.27,1.0
6,goggle_syn,0.75,0.89,0.19,0.58,0.77,0.91,0.58,0.66,0.13,0.55,1.0
7,arf_syn,1.0,0.99,0.33,0.89,0.98,0.97,0.91,0.93,0.02,0.15,1.0
8,ddpm_syn,1.0,0.94,0.44,0.96,0.97,0.96,0.89,0.93,0.03,0.09,1.0
9,nflow_syn,0.99,0.97,0.38,0.88,0.95,0.94,0.89,0.91,0.04,0.17,1.0


In [252]:
pd.DataFrame(get_scores_df("pums"))

Unnamed: 0,model,domain_cov,stats_cov,outliers_cov,ks_sim,tv_sim,corr_sim,contin_sim,sdv_quality_report,wass_dist,js_dist,new_row_synthesis
0,actgan_gretel,0.81,0.99,0.6,0.85,0.98,0.97,0.91,0.92,0.01,0.2,0.99
1,ctgan_sdv,0.82,0.99,0.67,0.88,0.99,0.99,0.94,0.95,0.01,0.19,1.0
2,tvae_sdv,0.8,0.97,0.63,0.81,0.92,0.95,0.82,0.86,0.04,0.29,1.0
3,gaussian_copula_sdv,0.77,0.96,0.52,0.73,0.73,0.97,0.62,0.69,0.05,0.35,1.0
4,ctgan_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
5,tvae_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
6,goggle_syn,Running,Running,Running,Running,Running,Running,Running,Running,Running,Running,Running
7,arf_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed
8,ddpm_syn,Canceled,Canceled,Canceled,Canceled,Canceled,Canceled,Canceled,Canceled,Canceled,Canceled,Canceled
9,nflow_syn,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed,Failed


In [281]:
def get_ml_metrics(ml_efficacy_stats):
    # ml_efficacy_stats = data_dict["ml_efficacy"]
    ml_metrics = {}
    for classifier, scores in ml_efficacy_stats.items():
        if classifier == "timing":
            continue            
        ml_metrics[classifier] = round(abs(scores["synthetic_f1"] - scores["real_f1"]), 2)
    return ml_metrics

ML_CLASSIFICATION_TASK_DATASETS = [
    "adult", "census", "credit", "covtype", "loan", "intrusion"]
ML_REGRESSION_TASK_DATASETS = ["health_insurance"]
# ML_CLASSIFICATION_MODELS = ["adaboost", "decision_tree", "logistic", "mlp"]
# ML_REGRESSION_MODELS = ["linear", "mlp"]

In [282]:
ERROR_VAL  = -99
def get_ml_scores_df(exp_dataset, final_report, metric):
    for lib, models in ALL_TABULAR_MODELS.items():
        for model in models: 
            # corr_file = f"../metrics_out/tabular/{model}_{lib}/{exp_dataset}/{exp_dataset}_{model}_correlation.csv"
            metrics_file = f"../metrics_out/tabular/{model}_{lib}/{exp_dataset}/{exp_dataset}_{model}_metrics.json"
            
            final_report["dataset"].append(exp_dataset)
            final_report["model"].append(f"{model}_{lib}")
            
            try:
                with open(metrics_file, 'r') as file:
                    metrics_scores = json.load(file)
            except Exception as e:
                # print(model, lib)
                status = None
                if lib in TABULAR_INCOMPLETE_JOBS and model in TABULAR_INCOMPLETE_JOBS[lib]:
                    if exp_dataset in TABULAR_INCOMPLETE_JOBS[lib][model]:
                        # print("Status: ",  TABULAR_INCOMPLETE_JOBS[lib][model][exp_dataset])
                        # print("-")
                        status = TABULAR_INCOMPLETE_JOBS[lib][model][exp_dataset]
                else:
                    status = "Skipped"
                    # print(e)
                    # print("Skipped")
                for k in final_report.keys():
                    if k == "model": continue
                    final_report[k].append(status)
                    # print(final_report)
                continue
                
            for classifier, scores in metrics_scores["ml_efficacy"].items():
                if classifier == "timing":
                    continue            
                final_report[classifier].append(round(abs(scores[f"synthetic_{metric}"] - scores[f"real_{metric}"]), 2))
                # final_report_ml_cls["model"]["adaboost_classifier"].append()
                
    return final_report


In [286]:
final_report_ml_cls = {
    "dataset": [],  
    "model": [],
    "adaboost_classification": [], 
    "decision_tree_classification": [], 
    "logistic_classification": [],
    "mlp_classification": [], 
    # "linear_regressor": [],
    # "mlp_regressor": []
}

ml_cls_metric = "f1"

final_report_ml_regress = {
    "dataset": [],  
    "model": [],
    "linear_regression": [], 
    "mlp_regression": []
}

ml_regress_metric = "r2"

In [287]:
final_report = get_ml_scores_df("adult", final_report_ml_cls, ml_cls_metric)
pd.DataFrame(final_report)

Unnamed: 0,dataset,model,adaboost_classification,decision_tree_classification,logistic_classification,mlp_classification
0,adult,actgan_gretel,0.03,0.01,0.05,0.02
1,adult,ctgan_sdv,0.01,0.03,0.05,0.01
2,adult,tvae_sdv,0.01,0.02,0.05,0.03
3,adult,gaussian_copula_sdv,0.02,0.11,0.06,0.03
4,adult,ctgan_syn,0.04,0.08,0.06,0.02
5,adult,tvae_syn,0.02,0.07,0.08,0.01
6,adult,goggle_syn,0.06,0.31,0.23,0.1
7,adult,arf_syn,0.01,0.02,0.03,0.02
8,adult,ddpm_syn,0.01,0.02,0.03,0.02
9,adult,nflow_syn,0.03,0.05,0.09,0.03


In [292]:
final_report = get_ml_scores_df("loan", final_report_ml_cls, ml_cls_metric)
pd.DataFrame(final_report)
#"adult", "census", "credit", "covtype", "loan", "intrusion"

ValueError: All arrays must be of the same length

In [288]:
final_report = get_ml_scores_df("health_insurance", final_report_ml_regress, ml_regress_metric)
pd.DataFrame(final_report)

Unnamed: 0,dataset,model,linear_regression,mlp_regression
0,health_insurance,actgan_gretel,0.55,0.01
1,health_insurance,ctgan_sdv,0.9,0.01
2,health_insurance,tvae_sdv,0.12,0.01
3,health_insurance,gaussian_copula_sdv,0.31,0.01
4,health_insurance,ctgan_syn,0.0,0.01
5,health_insurance,tvae_syn,0.05,0.01
6,health_insurance,goggle_syn,0.44,0.02
7,health_insurance,arf_syn,0.0,0.01
8,health_insurance,ddpm_syn,0.27,0.01
9,health_insurance,nflow_syn,0.1,0.01
