In [1]:
import pandas as pd
import csv
import os
import sys
import csv
import time
import numpy as np
import random
from scipy.io import arff

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

sys.path.insert(0, f"{os.path.dirname(os.getcwd())}/src")
from utils import evaluate_result
from data_imbalance_src.smote_oversampling import RandomOversampling, ADASYNOversampling, BorderlineSMOTEOversampling, SMOTEOversampling, SVMSMOTEOversampling
from data_imbalance_src.smote_oversampling import SMOTUNEDOversampling
from data_imbalance_src.dazzle import DAZZLEOversampling
from data_imbalance_src.dazzle1 import DAZZLE1Oversampling
from data_imbalance_src.dazzle2 import DAZZLE2Oversampling
from data_imbalance_src.Imbalance_Farou2022.data_generation import GANOversampling
from data_imbalance_src.random_projection import RandomProjectionOversampling

from diveplane.utilities import infer_feature_attributes
from diveplane.geminai import Geminai

from DataSynthesizer.DataDescriber import DataDescriber
from DataSynthesizer.DataGenerator import DataGenerator
from DataSynthesizer.ModelInspector import ModelInspector
from DataSynthesizer.lib.utils import read_json_file, display_bayesian_network

from sdv.metadata import SingleTableMetadata
from sdv.lite import SingleTablePreset
from sdv.single_table import CTGANSynthesizer
from sdv.single_table import GaussianCopulaSynthesizer

2023-10-06 01:44:44.870398: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-06 01:44:44.890315: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-06 01:44:45.128417: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-06 01:44:45.132203: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
rs_list = random.sample(range(50, 500), 10)

In [3]:
for repeat in range(10):
    print(f"----- in repeat {repeat+1} -----")
    rs = rs_list[repeat]
    write_file = f"Ambari_Vuln_res_r{repeat+1}_rn{rs}.csv"
    write_path = f"{os.path.dirname(os.getcwd())}/result/Ambari_Vuln/{write_file}"
    with open(write_path, "w", newline="") as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow(["oversampling_scheme", "runtime", "learner", "acc", "prec", "recall", "fpr", "f1", "auc", "g_score", "d2h"])
    
    train_data_path = f"{os.path.dirname(os.getcwd())}/data/Bug_Reports/ambari-train.csv"
    test_data_path = f"{os.path.dirname(os.getcwd())}/data/Bug_Reports/ambari-test.csv"
    train_df = pd.read_csv(train_data_path)
    test_df = pd.read_csv(test_data_path)
    
    X_train = train_df.iloc[:, :-1]
    y_train = train_df.iloc[:, -1]
    X_test = test_df.iloc[:, :-1]
    y_test = test_df.iloc[:, -1]
    print("--- y train classes count: \n" + str(y_train.value_counts()))
    print("--- y train ratio: 1:" + str(round(y_train.value_counts()[0] / y_train.value_counts()[1])))
    print(" ")
    print("--- y test classes count: \n" + str(y_test.value_counts()))
    print("--- y test ratio: 1:" + str(round(y_test.value_counts()[0] / y_test.value_counts()[1])))
    
    ########## normal run ##########
    # normal run - without any oversampling technique
    print("----- normal -----")

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train)
    clf_KNN.fit(X_train_scale, y_train)
    clf_LR.fit(X_train_scale, y_train)
    clf_DT.fit(X_train_scale, y_train)
    clf_RF.fit(X_train_scale, y_train)
    clf_LightGBM.fit(X_train_scale, y_train)
    clf_Adaboost.fit(X_train_scale, y_train)
    clf_GBDT.fit(X_train_scale, y_train)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["No", 0, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["No", 0, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["No", 0, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["No", 0, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["No", 0, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["No", 0, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["No", 0, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["No", 0, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## random run ##########
    # random oversampling run - random oversampling technique
    print("----- random -----")

    rt, X_train_new, y_train_new = RandomOversampling(X_train=X_train, y_train=y_train)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["Random", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["Random", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["Random", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["Random", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["Random", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["Random", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["Random", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["Random", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## ADASYN run ##########
    # ADASYN oversampling run - ADASYN oversampling technique
    print("----- ADASYN ------")

    rt, X_train_new, y_train_new = ADASYNOversampling(X_train=X_train, y_train=y_train)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["ADASYN", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["ADASYN", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["ADASYN", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["ADASYN", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["ADASYN", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["ADASYN", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["ADASYN", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["ADASYN", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## BorderlineSMOTE run ##########
    # BorderlineSMOTE oversampling run - BorderlineSMOTE oversampling technique
    print("----- borderlineSMOTE -----")

    rt, X_train_new, y_train_new = BorderlineSMOTEOversampling(X_train=X_train, y_train=y_train)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["BorderlineSMOTE", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["BorderlineSMOTE", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## SMOTE run ##########
    # SMOTE oversampling run - SMOTE oversampling technique
    print("----- SMOTE -----")

    rt, X_train_new, y_train_new = SMOTEOversampling(X_train=X_train, y_train=y_train)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["SMOTE", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["SMOTE", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["SMOTE", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["SMOTE", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["SMOTE", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["SMOTE", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["SMOTE", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["SMOTE", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## SVMSMOTE run ##########
    # SVMSMOTE oversampling run - SVMSMOTE oversampling technique
    print("----- SVMSMOTE -----")

    rt, X_train_new, y_train_new = SVMSMOTEOversampling(X_train=X_train, y_train=y_train)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["SVMSMOTE", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["SVMSMOTE", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## SMOTUNED run ##########
    # SMOTUNED oversampling run - SMOTUNED oversampling technique
    print("----- SMOTUNED -----")

    rt_SVM, X_train_new_SVM, y_train_new_SVM = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                    y_train=y_train, y_test=y_test, model="SVM")

    scaler = StandardScaler()
    X_train_scale_SVM = pd.DataFrame(scaler.fit_transform(X_train_new_SVM), columns=X_train_new_SVM.columns)
    X_test_scale_SVM = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of SVM: 1:" + str(round(y_train_new_SVM.value_counts()[0] / y_train_new_SVM.value_counts()[1])))

    rt_KNN, X_train_new_KNN, y_train_new_KNN = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                    y_train=y_train, y_test=y_test, model="KNN")

    scaler = StandardScaler()
    X_train_scale_KNN = pd.DataFrame(scaler.fit_transform(X_train_new_KNN), columns=X_train_new_KNN.columns)
    X_test_scale_KNN = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of KNN: 1:" + str(round(y_train_new_KNN.value_counts()[0] / y_train_new_KNN.value_counts()[1])))

    rt_LR, X_train_new_LR, y_train_new_LR = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                 y_train=y_train, y_test=y_test, model="LR")

    scaler = StandardScaler()
    X_train_scale_LR = pd.DataFrame(scaler.fit_transform(X_train_new_LR), columns=X_train_new_LR.columns)
    X_test_scale_LR = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of LR: 1:" + str(round(y_train_new_LR.value_counts()[0] / y_train_new_LR.value_counts()[1])))

    rt_DT, X_train_new_DT, y_train_new_DT = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                 y_train=y_train, y_test=y_test, model="DT")

    scaler = StandardScaler()
    X_train_scale_DT = pd.DataFrame(scaler.fit_transform(X_train_new_DT), columns=X_train_new_DT.columns)
    X_test_scale_DT = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of DT: 1:" + str(round(y_train_new_DT.value_counts()[0] / y_train_new_DT.value_counts()[1])))

    rt_RF, X_train_new_RF, y_train_new_RF = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                 y_train=y_train, y_test=y_test, model="RF")

    scaler = StandardScaler()
    X_train_scale_RF = pd.DataFrame(scaler.fit_transform(X_train_new_RF), columns=X_train_new_RF.columns)
    X_test_scale_RF = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of RF: 1:" + str(round(y_train_new_RF.value_counts()[0] / y_train_new_RF.value_counts()[1])))

    rt_LightGBM, X_train_new_LightGBM, y_train_new_LightGBM = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                                   y_train=y_train, y_test=y_test, model="LightGBM")

    scaler = StandardScaler()
    X_train_scale_LightGBM = pd.DataFrame(scaler.fit_transform(X_train_new_LightGBM), columns=X_train_new_LightGBM.columns)
    X_test_scale_LightGBM = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of LightGBM: 1:" + str(round(y_train_new_LightGBM.value_counts()[0] / y_train_new_LightGBM.value_counts()[1])))

    rt_Adaboost, X_train_new_Adaboost, y_train_new_Adaboost = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                                   y_train=y_train, y_test=y_test, model="Adaboost")

    scaler = StandardScaler()
    X_train_scale_Adaboost = pd.DataFrame(scaler.fit_transform(X_train_new_Adaboost), columns=X_train_new_Adaboost.columns)
    X_test_scale_Adaboost = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of Adaboost: 1:" + str(round(y_train_new_Adaboost.value_counts()[0] / y_train_new_Adaboost.value_counts()[1])))

    rt_GBDT, X_train_new_GBDT, y_train_new_GBDT = SMOTUNEDOversampling(X_train=X_train, X_test=X_test, 
                                                                       y_train=y_train, y_test=y_test, model="GBDT")

    scaler = StandardScaler()
    X_train_scale_GBDT = pd.DataFrame(scaler.fit_transform(X_train_new_GBDT), columns=X_train_new_GBDT.columns)
    X_test_scale_GBDT = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    print("y train ratio of GBDT: 1:" + str(round(y_train_new_GBDT.value_counts()[0] / y_train_new_GBDT.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale_SVM, y_train_new_SVM)
    clf_KNN.fit(X_train_scale_KNN, y_train_new_KNN)
    clf_LR.fit(X_train_scale_LR, y_train_new_LR)
    clf_DT.fit(X_train_scale_DT, y_train_new_DT)
    clf_RF.fit(X_train_scale_RF, y_train_new_RF)
    clf_LightGBM.fit(X_train_scale_LightGBM, y_train_new_LightGBM)
    clf_Adaboost.fit(X_train_scale_Adaboost, y_train_new_Adaboost)
    clf_GBDT.fit(X_train_scale_GBDT, y_train_new_GBDT)

    y_pred_SVM = clf_SVM.predict(X_test_scale_SVM)
    y_pred_KNN = clf_KNN.predict(X_test_scale_KNN)
    y_pred_LR = clf_LR.predict(X_test_scale_LR)
    y_pred_DT = clf_DT.predict(X_test_scale_DT)
    y_pred_RF = clf_RF.predict(X_test_scale_RF)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale_LightGBM)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale_Adaboost)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale_GBDT)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["SMOTUNED", rt_SVM, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["SMOTUNED", rt_KNN, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["SMOTUNED", rt_LR, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["SMOTUNED", rt_DT, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["SMOTUNED", rt_RF, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["SMOTUNED", rt_LightGBM, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["SMOTUNED", rt_Adaboost, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["SMOTUNED", rt_GBDT, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## DAZZLE run ##########
    # DAZZLE oversampling run - DAZZLE oversampling technique
    print("----- DAZZLE1 -----")
    cols = X_train.columns
    tar = y_train.name

    X_train_GAN = X_train.copy()
    y_train_GAN = y_train.copy()
    X_test_GAN = X_test.copy()
    y_test_GAN = y_test.copy()
    rt, X_train_new, y_train_new, X_test_scale = DAZZLE1Oversampling(X_train=X_train_GAN, 
                                                                     y_train=y_train_GAN, 
                                                                     X_test=X_test_GAN,
                                                                     y_test=y_test_GAN)

    X_train_new = pd.DataFrame(X_train_new, columns=cols)
    y_train_new = pd.Series(y_train_new, name=tar)
    X_test_scale = pd.DataFrame(X_test_scale, columns=cols)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_new, y_train_new)
    clf_KNN.fit(X_train_new, y_train_new)
    clf_LR.fit(X_train_new, y_train_new)
    clf_DT.fit(X_train_new, y_train_new)
    clf_RF.fit(X_train_new, y_train_new)
    clf_LightGBM.fit(X_train_new, y_train_new)
    clf_Adaboost.fit(X_train_new, y_train_new)
    clf_GBDT.fit(X_train_new, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)

    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["DAZZLE1", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["DAZZLE1", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
        
    ########## DAZZLE run ##########
    # DAZZLE oversampling run - DAZZLE oversampling technique
    print("----- DAZZLE2 -----")
    cols = X_train.columns
    tar = y_train.name

    X_train_GAN = X_train.copy()
    y_train_GAN = y_train.copy()
    X_test_GAN = X_test.copy()
    y_test_GAN = y_test.copy()
    rt, X_train_new, y_train_new, X_test_scale = DAZZLE2Oversampling(X_train=X_train_GAN, 
                                                                     y_train=y_train_GAN, 
                                                                     X_test=X_test_GAN)

    X_train_new = pd.DataFrame(X_train_new, columns=cols)
    y_train_new = pd.Series(y_train_new, name=tar)
    X_test_scale = pd.DataFrame(X_test_scale, columns=cols)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_new, y_train_new)
    clf_KNN.fit(X_train_new, y_train_new)
    clf_LR.fit(X_train_new, y_train_new)
    clf_DT.fit(X_train_new, y_train_new)
    clf_RF.fit(X_train_new, y_train_new)
    clf_LightGBM.fit(X_train_new, y_train_new)
    clf_Adaboost.fit(X_train_new, y_train_new)
    clf_GBDT.fit(X_train_new, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)

    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["DAZZLE2", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["DAZZLE2", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## WGAN run ##########
    # WGAN oversampling run - WGAN oversampling technique
    print("----- WGAN -----")

    scaler = StandardScaler()
    X_train_GAN = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
    rt, X_train_new, y_train_new = GANOversampling(X_train=X_train_GAN, y_train=y_train)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["WGAN", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["WGAN", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["WGAN", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["WGAN", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["WGAN", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["WGAN", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["WGAN", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["WGAN", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))


    ########## RandomProjection run ##########
    # Random projection oversampling run - Random projection oversampling technique
    print("----- Random Projection -----")
    X_train_RP = X_train.copy()
    y_train_RP = y_train.copy()
    rt, X_train_new, y_train_new = RandomProjectionOversampling(X_train=X_train_RP, y_train=y_train_RP)
    
    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)

    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["RP", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["RP", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["RP", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["RP", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["RP", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["RP", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["RP", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["RP", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## Diveplane 1 run ##########
    print("----- diveplane 1 -----")
    tar = y_train.name
    conditions = [{tar: 1},
                  {tar: 0}] * (int(X_train.shape[0] / 2))
    
    X_train_diveplane_1 = X_train.copy()
    y_train_diveplane_1 = y_train.copy()
    X_train_diveplane_1[tar] = y_train_diveplane_1
    
    features = infer_feature_attributes(X_train_diveplane_1)
    for f_name, f_value in features.items():
        if f_value["type"] == "nominal":
            f_value["non_sensitive"] = True

    start_time = time.time()
    g = Geminai()
    g.train(X_train_diveplane_1, features=features)

    gen_df = g.synthesize_cases(
        n_samples=len(conditions),
        case_context_values_maps=conditions,
        desired_conviction=5,
        generate_new_cases="no"
    )

    rt = time.time() - start_time

    X_train_new = gen_df.iloc[:, :-1]
    y_train_new = gen_df.iloc[:, -1]
    
    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["Diveplane1", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["Diveplane1", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["Diveplane1", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["Diveplane1", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["Diveplane1", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["Diveplane1", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["Diveplane1", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["Diveplane1", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## Diveplane 2 run ##########
    print("----- diveplane 2 -----")
    tar = y_train.name
    n_cases_diff = y_train.value_counts()[0] - y_train.value_counts()[1]
    conditions = [{tar: 1}] * n_cases_diff
    
    X_train_diveplane_2 = X_train.copy()
    y_train_diveplane_2 = y_train.copy()
    X_train_diveplane_2[tar] = y_train_diveplane_2
    
    features = infer_feature_attributes(X_train_diveplane_2)
    for f_name, f_value in features.items():
        if f_value["type"] == "nominal":
            f_value["non_sensitive"] = True

    start_time = time.time()
    g = Geminai()
    g.train(X_train_diveplane_2, features=features)

    gen_df = g.synthesize_cases(
        n_samples=len(conditions),
        case_context_values_maps=conditions,
        desired_conviction=5,
        generate_new_cases="no"
    )

    rt = time.time() - start_time

    X_train_new = pd.concat([X_train_diveplane_2, gen_df], ignore_index=True, axis=0)
    y_train_new = X_train_new.iloc[:, -1]
    X_train_new = X_train_new.iloc[:, :-1]
    
    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)
    
    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["Diveplane2", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["Diveplane2", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["Diveplane2", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["Diveplane2", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["Diveplane2", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["Diveplane2", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["Diveplane2", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["Diveplane2", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## DS run ##########
    print("----- DS -----")
    mode = "independent_attribute_mode"

    col = X_train.columns
    tar = y_train.name
    
    X_train_DS = X_train.copy()
    y_train_DS = y_train.copy()
    X_train_DS[tar] = y_train_DS
    write_df = X_train_DS[X_train_DS[tar] == 1]
    write_df = write_df.iloc[:, :-1]
    write_df.to_csv(f"{os.path.dirname(os.getcwd())}/extra/js_vuln_pos_df.csv", index=False)
    
    threshold = 20
    num_tuples_to_generate = int(y_train.value_counts()[0] - y_train.value_counts()[1])

    start_time = time.time()

    description_file = f"{os.path.dirname(os.getcwd())}/extra/js_vuln.json"
    describer = DataDescriber(category_threshold=threshold)
    describer.describe_dataset_in_independent_attribute_mode(
        dataset_file=f"{os.path.dirname(os.getcwd())}/extra/js_vuln_pos_df.csv"
    )
    describer.save_dataset_description_to_file(description_file)

    generator = DataGenerator()
    generator.generate_dataset_in_independent_mode(num_tuples_to_generate, description_file)
    generator.save_synthetic_data(f"{os.path.dirname(os.getcwd())}/extra/js_vuln_syn_df.csv")

    rt = time.time() - start_time

    X_train_new = pd.read_csv(f"{os.path.dirname(os.getcwd())}/extra/js_vuln_syn_df.csv").to_numpy()
    y_train_new = np.ones(num_tuples_to_generate)
    X_train_new = pd.DataFrame(np.vstack((X_train.to_numpy(), X_train_new)), columns=col)
    y_train_new = pd.Series(np.hstack((y_train.to_numpy(), y_train_new)), name=tar)
    
    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["DS", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["DS", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["DS", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["DS", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["DS", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["DS", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["DS", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["DS", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## SDV FASTML run ##########
    print("----- SDV FASTML -----")
    col = X_train.columns
    tar = y_train.name
    num_tuples_to_generate = int(y_train.value_counts()[0] - y_train.value_counts()[1])
    
    X_train_SDV = X_train.copy()
    y_train_SDV = y_train.copy()
    X_train_SDV[tar] = y_train_SDV
    pos_df = X_train_SDV[X_train_SDV[tar] == 1]
    pos_df = pos_df.iloc[:, :-1]

    metadata = SingleTableMetadata()
    metadata.detect_from_dataframe(data=pos_df)
    
    start_time = time.time()
    syn1 = SingleTablePreset(metadata, name="FAST_ML")
    syn1.fit(data=pos_df)
    X_train_new = syn1.sample(num_rows=num_tuples_to_generate).to_numpy()

    rt = time.time() - start_time

    X_train_new = pd.DataFrame(np.vstack((X_train.to_numpy(), X_train_new)), columns=col)
    y_train_new = np.ones(num_tuples_to_generate)
    y_train_new = pd.Series(np.hstack((y_train.to_numpy(), y_train_new)), name=tar)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["SDV_FASTML", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["SDV_FASTML", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## SDV GC ##########
    print("----- SDV GC -----")
    start_time = time.time()
    syn2 = GaussianCopulaSynthesizer(metadata)
    syn2.fit(data=pos_df)
    X_train_new = syn2.sample(num_rows=num_tuples_to_generate).to_numpy()

    rt = time.time() - start_time

    X_train_new = pd.DataFrame(np.vstack((X_train.to_numpy(), X_train_new)), columns=col)
    y_train_new = np.ones(num_tuples_to_generate)
    y_train_new = pd.Series(np.hstack((y_train.to_numpy(), y_train_new)), name=tar)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["SDV_GC", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["SDV_GC", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["SDV_GC", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["SDV_GC", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["SDV_GC", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["SDV_GC", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["SDV_GC", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["SDV_GC", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))
    
    ########## SDV GAN ##########
    print("----- SDV GAN -----")
    start_time = time.time()
    syn3 = CTGANSynthesizer(metadata)
    syn3.fit(data=pos_df)
    X_train_new = syn3.sample(num_rows=num_tuples_to_generate).to_numpy()

    rt = time.time() - start_time

    X_train_new = pd.DataFrame(np.vstack((X_train.to_numpy(), X_train_new)), columns=col)
    y_train_new = np.ones(num_tuples_to_generate)
    y_train_new = pd.Series(np.hstack((y_train.to_numpy(), y_train_new)), name=tar)

    # scale data
    scaler = StandardScaler()
    X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_new), columns=X_train_new.columns, index=X_train_new.index)
    X_test_scale = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

    print("y train ratio: 1:" + str(round(y_train_new.value_counts()[0] / y_train_new.value_counts()[1])))

    # create models
    clf_SVM = SVC()
    clf_KNN = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)
    clf_LR = LogisticRegression(random_state=42, solver="saga", max_iter=20000, n_jobs=-1)
    clf_DT = DecisionTreeClassifier()
    clf_RF = RandomForestClassifier(random_state=42, n_jobs=-1)
    clf_LightGBM = LGBMClassifier(objective="binary", random_state=42, n_jobs=-1)
    clf_Adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf_GBDT = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

    clf_SVM.fit(X_train_scale, y_train_new)
    clf_KNN.fit(X_train_scale, y_train_new)
    clf_LR.fit(X_train_scale, y_train_new)
    clf_DT.fit(X_train_scale, y_train_new)
    clf_RF.fit(X_train_scale, y_train_new)
    clf_LightGBM.fit(X_train_scale, y_train_new)
    clf_Adaboost.fit(X_train_scale, y_train_new)
    clf_GBDT.fit(X_train_scale, y_train_new)

    y_pred_SVM = clf_SVM.predict(X_test_scale)
    y_pred_KNN = clf_KNN.predict(X_test_scale)
    y_pred_LR = clf_LR.predict(X_test_scale)
    y_pred_DT = clf_DT.predict(X_test_scale)
    y_pred_RF = clf_RF.predict(X_test_scale)
    y_pred_LightGBM = clf_LightGBM.predict(X_test_scale)
    y_pred_Adaboost = clf_Adaboost.predict(X_test_scale)
    y_pred_GBDT = clf_GBDT.predict(X_test_scale)
    
    with open(write_path, "a", newline="") as f:
        csv_writer = csv.writer(f)

        csv_writer.writerow(["SDV_GAN", rt, "SVM"] + evaluate_result(y_pred_SVM, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "KNN"] + evaluate_result(y_pred_KNN, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "LR"] + evaluate_result(y_pred_LR, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "DT"] + evaluate_result(y_pred_DT, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "RF"] + evaluate_result(y_pred_RF, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "LightGBM"] + evaluate_result(y_pred_LightGBM, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "Adaboost"] + evaluate_result(y_pred_Adaboost, y_test))
        csv_writer.writerow(["SDV_GAN", rt, "GBDT"] + evaluate_result(y_pred_GBDT, y_test))

----- in repeat 1 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/1048] LG:0.039 LD:-0.246 D:0.127 GP:0.075 RMSEAVG:0.301 NUM:0.301 SynTraiAuc:0.021 RFAcc:1.000                   
[  500/1048] LG:1.076 LD:1.626 D:1.737 GP:0.022 RMSEAVG:0.301 NUM:0.301 SynTraiAuc:0.021 RFAcc:1.000                    
[ 1000/1048] LG:0.614 LD:0.716 D:0.744 GP:0.006 RMSEAVG:0.301 NUM

389                                                                                                                     
[    0/2058] LG:0.360 LD:-0.370 D:-0.021 GP:0.035 RMSEAVG:0.204 NUM:0.204 SynTraiAuc:0.976 RFAcc:1.000                  
[  500/2058] LG:0.912 LD:1.011 D:1.062 GP:0.005 RMSEAVG:0.204 NUM:0.204 SynTraiAuc:0.976 RFAcc:1.000                    
[ 1000/2058] LG:0.953 LD:0.864 D:0.882 GP:0.002 RMSEAVG:0.204 NUM:0.204 SynTraiAuc:0.976 RFAcc:1.000                    
[ 1500/2058] LG:1.004 LD:0.746 D:0.756 GP:0.001 RMSEAVG:0.073 NUM:0.073 SynTraiAuc:0.976 RFAcc:1.000                    
[ 2000/2058] LG:1.040 LD:0.674 D:0.700 GP:0.003 RMSEAVG:0.073 NUM:0.073 SynTraiAuc:0.976 RFAcc:1.000                    
[ 2058/2058] LG:1.057 LD:0.587 D:0.604 GP:0.002 RMSEAVG:0.073 NUM:0.073 SynTraiAuc:0.976 RFAcc:1.000                    
needed sample                                                                                                           
389                             

2023-10-06 02:17:07.847798: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-06 02:17:07.848705: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/150 completed. Gen loss: -0.01875651441514492. Desc loss_real: 0.03131449595093727. Desc loss_fake: 0.01875651441514492
Epoch 51/150 completed. Gen loss: -0.12709473073482513. Desc loss_real: 0.13733088970184326. Desc loss_fake: 0.12709473073482513
Epoch 101/150 completed. Gen loss: -0.03215299919247627. Desc loss_real: 0.032629549503326416. Desc loss_fake: 0.03215299919247627


  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 2 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/1824] LG:-0.059 LD:-1.984 D:0.144 GP:0.193 RMSEAVG:0.325 NUM:0.325 SynTraiAuc:0.667 RFAcc:1.000                  
[  500/1824] LG:0.323 LD:1.360 D:1.402 GP:0.004 RMSEAVG:0.325 NUM:0.325 SynTraiAuc:0.667 RFAcc:1.000                    
[ 1000/1824] LG:0.574 LD:1.886 D:2.073 GP:0.01

needed sample                                                                                                           
389                                                                                                                     
[    0/721] LG:-0.239 LD:-0.308 D:0.011 GP:0.029 RMSEAVG:0.175 NUM:0.175 SynTraiAuc:0.000 RFAcc:1.000                   
[  500/721] LG:0.089 LD:0.627 D:0.642 GP:0.001 RMSEAVG:0.175 NUM:0.175 SynTraiAuc:0.000 RFAcc:1.000                     
[  721/721] LG:-0.153 LD:0.333 D:0.338 GP:0.001 RMSEAVG:0.175 NUM:0.175 SynTraiAuc:0.000 RFAcc:1.000                    
needed sample                                                                                                           
389                                                                                                                     
[    0/4116] LG:-0.122 LD:-0.054 D:0.069 GP:0.015 RMSEAVG:0.179 NUM:0.179 SynTraiAuc:0.805 RFAcc:1.000                  
[  500/4116] LG:0.757 LD:0.954 D

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 3 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/5344] LG:-0.083 LD:-0.438 D:-0.285 GP:0.011 RMSEAVG:0.154 NUM:0.154 SynTraiAuc:0.771 RFAcc:1.000                 
[  500/5344] LG:0.202 LD:0.655 D:0.665 GP:0.001 RMSEAVG:0.154 NUM:0.154 SynTraiAuc:0.771 RFAcc:1.000                    
[ 1000/5344] LG:-0.162 LD:0.073 D:0.080 GP:0.0

[ 5000/7317] LG:-0.054 LD:0.332 D:0.371 GP:0.002 RMSEAVG:0.053 NUM:0.053 SynTraiAuc:0.659 RFAcc:1.000                   
[ 5500/7317] LG:-0.079 LD:0.377 D:0.451 GP:0.004 RMSEAVG:0.053 NUM:0.053 SynTraiAuc:0.659 RFAcc:1.000                   
[ 6000/7317] LG:-0.051 LD:0.324 D:0.345 GP:0.001 RMSEAVG:0.053 NUM:0.053 SynTraiAuc:0.659 RFAcc:1.000                   
[ 6500/7317] LG:-0.015 LD:0.332 D:0.383 GP:0.003 RMSEAVG:0.040 NUM:0.040 SynTraiAuc:0.659 RFAcc:1.000                   
[ 7000/7317] LG:0.007 LD:0.385 D:0.444 GP:0.003 RMSEAVG:0.040 NUM:0.040 SynTraiAuc:0.659 RFAcc:1.000                    
[ 7317/7317] LG:0.042 LD:0.251 D:0.300 GP:0.003 RMSEAVG:0.040 NUM:0.040 SynTraiAuc:0.659 RFAcc:1.000                    
needed sample                                                                                                           
389                                                                                                                     
[    0/2534] LG:-0.097 LD:-4.327

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 4 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/424] LG:-0.113 LD:-0.434 D:-0.217 GP:0.017 RMSEAVG:0.171 NUM:0.171 SynTraiAuc:0.990 RFAcc:1.000                  
[  424/424] LG:0.447 LD:0.914 D:0.923 GP:0.001 RMSEAVG:0.171 NUM:0.171 SynTraiAuc:0.990 RFAcc:1.000                     
needed sample                                 

[ 2500/6804] LG:-0.510 LD:0.180 D:0.219 GP:0.008 RMSEAVG:0.034 NUM:0.034 SynTraiAuc:0.463 RFAcc:1.000                   
[ 3000/6804] LG:-0.477 LD:0.330 D:0.363 GP:0.007 RMSEAVG:0.034 NUM:0.034 SynTraiAuc:0.463 RFAcc:1.000                   
[ 3500/6804] LG:-0.461 LD:0.187 D:0.228 GP:0.008 RMSEAVG:0.034 NUM:0.034 SynTraiAuc:0.463 RFAcc:1.000                   
[ 4000/6804] LG:-0.617 LD:0.327 D:0.375 GP:0.010 RMSEAVG:0.030 NUM:0.030 SynTraiAuc:0.463 RFAcc:1.000                   
[ 4500/6804] LG:-0.595 LD:0.208 D:0.243 GP:0.007 RMSEAVG:0.030 NUM:0.030 SynTraiAuc:0.463 RFAcc:1.000                   
[ 5000/6804] LG:-0.633 LD:0.247 D:0.316 GP:0.014 RMSEAVG:0.036 NUM:0.036 SynTraiAuc:0.841 RFAcc:1.000                   
[ 5500/6804] LG:-0.605 LD:0.407 D:0.486 GP:0.016 RMSEAVG:0.036 NUM:0.036 SynTraiAuc:0.841 RFAcc:1.000                   
[ 6000/6804] LG:-0.631 LD:0.221 D:0.264 GP:0.009 RMSEAVG:0.036 NUM:0.036 SynTraiAuc:0.841 RFAcc:1.000                   
[ 6500/6804] LG:-0.535 LD:0.327 

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 5 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/1824] LG:0.033 LD:-0.991 D:-0.121 GP:0.058 RMSEAVG:0.330 NUM:0.330 SynTraiAuc:0.375 RFAcc:1.000                  
[  500/1824] LG:1.254 LD:2.228 D:2.354 GP:0.008 RMSEAVG:0.330 NUM:0.330 SynTraiAuc:0.375 RFAcc:1.000                    
[ 1000/1824] LG:0.813 LD:1.283 D:1.360 GP:0.00

[ 3000/4886] LG:-0.368 LD:0.250 D:0.299 GP:0.003 RMSEAVG:0.047 NUM:0.047 SynTraiAuc:0.073 RFAcc:1.000                   
[ 3500/4886] LG:-0.304 LD:0.364 D:0.402 GP:0.002 RMSEAVG:0.047 NUM:0.047 SynTraiAuc:0.073 RFAcc:1.000                   
[ 4000/4886] LG:-0.224 LD:0.301 D:0.339 GP:0.003 RMSEAVG:0.046 NUM:0.046 SynTraiAuc:0.073 RFAcc:1.000                   
[ 4500/4886] LG:-0.176 LD:0.365 D:0.389 GP:0.002 RMSEAVG:0.046 NUM:0.046 SynTraiAuc:0.073 RFAcc:1.000                   
[ 4886/4886] LG:-0.161 LD:0.509 D:0.551 GP:0.003 RMSEAVG:0.046 NUM:0.046 SynTraiAuc:0.073 RFAcc:1.000                   
needed sample                                                                                                           
389                                                                                                                     
[    0/4886] LG:0.072 LD:-0.117 D:-0.035 GP:0.012 RMSEAVG:0.258 NUM:0.258 SynTraiAuc:0.780 RFAcc:1.000                  
[  500/4886] LG:0.451 LD:0.987 D

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 6 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/1488] LG:0.124 LD:0.091 D:0.206 GP:0.008 RMSEAVG:0.318 NUM:0.318 SynTraiAuc:0.042 RFAcc:1.000                    
[  500/1488] LG:0.790 LD:1.502 D:1.526 GP:0.002 RMSEAVG:0.318 NUM:0.318 SynTraiAuc:0.042 RFAcc:1.000                    
[ 1000/1488] LG:0.360 LD:0.675 D:0.682 GP:0.00

[  500/752] LG:0.003 LD:0.680 D:0.706 GP:0.002 RMSEAVG:0.217 NUM:0.217 SynTraiAuc:0.878 RFAcc:1.000                     
[  752/752] LG:-0.096 LD:0.427 D:0.450 GP:0.002 RMSEAVG:0.217 NUM:0.217 SynTraiAuc:0.878 RFAcc:1.000                    
needed sample                                                                                                           
389                                                                                                                     
[    0/4671] LG:-0.493 LD:-0.734 D:-0.517 GP:0.013 RMSEAVG:0.315 NUM:0.315 SynTraiAuc:0.415 RFAcc:1.000                 
[  500/4671] LG:1.313 LD:2.466 D:2.570 GP:0.006 RMSEAVG:0.315 NUM:0.315 SynTraiAuc:0.415 RFAcc:1.000                    
[ 1000/4671] LG:0.707 LD:1.361 D:1.371 GP:0.001 RMSEAVG:0.315 NUM:0.315 SynTraiAuc:0.415 RFAcc:1.000                    
[ 1500/4671] LG:0.410 LD:0.726 D:0.771 GP:0.003 RMSEAVG:0.097 NUM:0.097 SynTraiAuc:0.415 RFAcc:1.000                    
[ 2000/4671] LG:0.262 LD:0.518 D

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 7 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/904] LG:0.071 LD:-1.065 D:0.181 GP:0.178 RMSEAVG:0.275 NUM:0.275 SynTraiAuc:0.750 RFAcc:1.000                    
[  500/904] LG:0.487 LD:0.875 D:0.891 GP:0.002 RMSEAVG:0.275 NUM:0.275 SynTraiAuc:0.750 RFAcc:1.000                     
[  904/904] LG:0.322 LD:0.537 D:0.556 GP:0.003

[ 2000/2534] LG:0.655 LD:0.498 D:0.547 GP:0.003 RMSEAVG:0.054 NUM:0.054 SynTraiAuc:0.780 RFAcc:1.000                    
[ 2500/2534] LG:0.640 LD:0.395 D:0.425 GP:0.002 RMSEAVG:0.020 NUM:0.020 SynTraiAuc:0.780 RFAcc:1.000                    
[ 2534/2534] LG:0.646 LD:0.433 D:0.463 GP:0.002 RMSEAVG:0.020 NUM:0.020 SynTraiAuc:0.780 RFAcc:1.000                    
needed sample                                                                                                           
389                                                                                                                     
[    0/8262] LG:0.120 LD:-0.101 D:0.096 GP:0.015 RMSEAVG:0.214 NUM:0.214 SynTraiAuc:0.000 RFAcc:1.000                   
[  500/8262] LG:-0.194 LD:0.388 D:1.130 GP:0.057 RMSEAVG:0.214 NUM:0.214 SynTraiAuc:0.000 RFAcc:1.000                   
[ 1000/8262] LG:1.068 LD:2.628 D:3.051 GP:0.032 RMSEAVG:0.214 NUM:0.214 SynTraiAuc:0.000 RFAcc:1.000                    
[ 1500/8262] LG:-0.845 LD:0.492 

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 8 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/4848] LG:0.010 LD:-9.590 D:-0.033 GP:0.503 RMSEAVG:0.155 NUM:0.155 SynTraiAuc:0.906 RFAcc:1.000                  
[  500/4848] LG:-3.710 LD:-7.493 D:-3.245 GP:0.224 RMSEAVG:0.155 NUM:0.155 SynTraiAuc:0.906 RFAcc:1.000                 
[ 1000/4848] LG:-0.353 LD:-0.576 D:-0.252 GP:0

[ 1316/1316] LG:0.063 LD:0.448 D:0.472 GP:0.003 RMSEAVG:0.027 NUM:0.027 SynTraiAuc:0.448 RFAcc:0.996                    
needed sample                                                                                                           
456                                                                                                                     
100%|██████████████████████████████████████████████████████████████████| 5/5 [02:46<00:00, 33.36s/trial, best loss: 1.0]
[ 4848/4848] LG:0.425 LD:-0.419 D:-0.215 GP:0.011 RMSEAVG:0.179 NUM:0.179 SynTraiAuc:0.219 RFAcc:1.000   
needed sample 456
y train ratio: 1:1
----- DAZZLE2 -----
[    0/1127] LG:-0.096 LD:-0.820 D:0.051 GP:0.145 RMSEAVG:0.226 NUM:0.226 SynTraiAuc:0.463 RFAcc:1.000                  
[  500/1127] LG:0.382 LD:0.740 D:0.759 GP:0.003 RMSEAVG:0.226 NUM:0.226 SynTraiAuc:0.463 RFAcc:1.000                    
[ 1000/1127] LG:0.366 LD:0.480 D:0.491 GP:0.002 RMSEAVG:0.226 NUM:0.226 SynTraiAuc:0.463 RFAcc:1.000           

[ 2500/4023] LG:0.346 LD:0.251 D:0.307 GP:0.005 RMSEAVG:0.026 NUM:0.026 SynTraiAuc:0.500 RFAcc:1.000                    
[ 3000/4023] LG:0.298 LD:0.304 D:0.338 GP:0.003 RMSEAVG:0.026 NUM:0.026 SynTraiAuc:0.500 RFAcc:1.000                    
[ 3500/4023] LG:0.411 LD:0.321 D:0.385 GP:0.005 RMSEAVG:0.026 NUM:0.026 SynTraiAuc:0.500 RFAcc:1.000                    
[ 4000/4023] LG:0.360 LD:0.328 D:0.366 GP:0.003 RMSEAVG:0.021 NUM:0.021 SynTraiAuc:0.500 RFAcc:1.000                    
[ 4023/4023] LG:0.396 LD:0.148 D:0.266 GP:0.010 RMSEAVG:0.021 NUM:0.021 SynTraiAuc:0.500 RFAcc:1.000                    
needed sample                                                                                                           
389                                                                                                                     
100%|██████████████████████████████████████████████████████████████████| 5/5 [02:23<00:00, 28.71s/trial, best loss: 1.0]
[ 1288/1288] LG:0.439 LD:0.650 D

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 9 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/5728] LG:-0.163 LD:-0.257 D:-0.071 GP:0.023 RMSEAVG:0.183 NUM:0.183 SynTraiAuc:0.146 RFAcc:1.000                 
[  500/5728] LG:0.615 LD:0.968 D:0.994 GP:0.003 RMSEAVG:0.183 NUM:0.183 SynTraiAuc:0.146 RFAcc:1.000                    
[ 1000/5728] LG:0.650 LD:0.867 D:0.903 GP:0.00

needed sample                                                                                                           
389                                                                                                                     
[    0/1792] LG:-0.336 LD:-0.594 D:-0.132 GP:0.035 RMSEAVG:0.183 NUM:0.183 SynTraiAuc:0.988 RFAcc:1.000                 
[  500/1792] LG:0.597 LD:0.853 D:0.861 GP:0.001 RMSEAVG:0.183 NUM:0.183 SynTraiAuc:0.988 RFAcc:1.000                    
[ 1000/1792] LG:0.674 LD:0.755 D:0.771 GP:0.001 RMSEAVG:0.183 NUM:0.183 SynTraiAuc:0.988 RFAcc:1.000                    
[ 1500/1792] LG:0.734 LD:0.582 D:0.605 GP:0.002 RMSEAVG:0.056 NUM:0.056 SynTraiAuc:0.988 RFAcc:1.000                    
[ 1792/1792] LG:0.724 LD:0.469 D:0.510 GP:0.003 RMSEAVG:0.056 NUM:0.056 SynTraiAuc:0.988 RFAcc:1.000                    
needed sample                                                                                                           
389                             

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
----- in repeat 10 -----
--- y train classes count: 
0    478
1     22
Name: label, dtype: int64
--- y train ratio: 1:22
 
--- y test classes count: 
0    493
1      7
Name: label, dtype: int64
--- y test ratio: 1:70
----- normal -----
----- random -----
y train ratio: 1:1
----- ADASYN ------
y train ratio: 1:1
----- borderlineSMOTE -----
y train ratio: 1:1
----- SMOTE -----
y train ratio: 1:1
----- SVMSMOTE -----
y train ratio: 1:2
----- SMOTUNED -----
y train ratio of SVM: 1:1
y train ratio of KNN: 1:1
y train ratio of LR: 1:1
y train ratio of DT: 1:1
y train ratio of RF: 1:1
y train ratio of LightGBM: 1:1
y train ratio of Adaboost: 1:1
y train ratio of GBDT: 1:1
----- DAZZLE1 -----
[    0/5376] LG:0.069 LD:-7.337 D:0.000 GP:0.489 RMSEAVG:0.259 NUM:0.259 SynTraiAuc:0.495 RFAcc:1.000                   
[  500/5376] LG:0.311 LD:0.425 D:0.575 GP:0.010 RMSEAVG:0.259 NUM:0.259 SynTraiAuc:0.495 RFAcc:1.000                    
[ 1000/5376] LG:0.206 LD:0.392 D:0.605 GP:0.0

[ 2500/7641] LG:-0.046 LD:0.517 D:0.537 GP:0.002 RMSEAVG:0.059 NUM:0.059 SynTraiAuc:0.378 RFAcc:1.000                   
[ 3000/7641] LG:-0.090 LD:0.302 D:0.331 GP:0.003 RMSEAVG:0.059 NUM:0.059 SynTraiAuc:0.378 RFAcc:1.000                   
[ 3500/7641] LG:-0.162 LD:0.155 D:0.187 GP:0.003 RMSEAVG:0.059 NUM:0.059 SynTraiAuc:0.378 RFAcc:1.000                   
[ 4000/7641] LG:-0.226 LD:-0.119 D:-0.039 GP:0.008 RMSEAVG:0.029 NUM:0.029 SynTraiAuc:0.378 RFAcc:1.000                 
[ 4500/7641] LG:-0.440 LD:-0.373 D:-0.097 GP:0.028 RMSEAVG:0.029 NUM:0.029 SynTraiAuc:0.378 RFAcc:1.000                 
[ 5000/7641] LG:-0.334 LD:-0.239 D:-0.204 GP:0.004 RMSEAVG:0.040 NUM:0.040 SynTraiAuc:0.427 RFAcc:1.000                 
[ 5500/7641] LG:-0.720 LD:-0.147 D:-0.124 GP:0.002 RMSEAVG:0.040 NUM:0.040 SynTraiAuc:0.427 RFAcc:1.000                 
[ 6000/7641] LG:-0.356 LD:0.039 D:0.129 GP:0.009 RMSEAVG:0.040 NUM:0.040 SynTraiAuc:0.427 RFAcc:1.000                   
[ 6500/7641] LG:-0.317 LD:0.089 

  new_data = new_data.append(synthetic_data)
  X_sample = X_sample.append(X_train)
  X_sample = X_sample.drop(tar, 1)


y train ratio: 1:1
----- Random Projection -----
y train ratio: 1:1
----- diveplane 1 -----




y train ratio: 1:1
----- diveplane 2 -----




y train ratio: 1:1
----- DS -----
y train ratio: 1:1
----- SDV FASTML -----
y train ratio: 1:1
----- SDV GC -----
y train ratio: 1:1
----- SDV GAN -----








y train ratio: 1:1
