# Loading data

In [2]:
import os
import numpy as np
import pandas as pd
from time import process_time
from glob import glob

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

# settings to display all columns
pd.set_option("display.max_columns", None)

In [10]:
def precision_0_recall_1_inverse_weighted_fbeta(y_true, y_pred, beta=2.0):
    precisions, recalls, fbeta_scores, supports = precision_recall_fscore_support(y_true, y_pred, beta=beta, average=None)

    precision_0 = round(precisions[0], 4)
    recall_1 = round(recalls[1], 4)
    ratio_0, ratio_1 = supports / sum(supports)
    inverse_weighted_fbeta_score = round(fbeta_scores[0]*ratio_1 + fbeta_scores[1]*ratio_0, 4)
    
    return precision_0, recall_1, inverse_weighted_fbeta_score

In [None]:
# Get cut points of EWB for histogram data
def equal_width_cut_points(lower_bound, upper_bound, n_bins, hist_data):
    for i in range(len(hist_data)):
        if hist_data[i] != 0:
            min_value = i + lower_bound
            break
    
    for i in range(len(hist_data) - 1, -1, -1):
        if hist_data[i] != 0:
            max_value = i + lower_bound
            break
    
    bin_width = (max_value - min_value) / n_bins
    cut_points = [round(min_value + i * bin_width) for i in range(0, n_bins + 1)]
    
    if lower_bound not in cut_points:
        cut_points.insert(0, lower_bound)
    if upper_bound not in cut_points:
        cut_points.append(upper_bound)
    
    return cut_points

def equal_width_cut_points_naive(lower_bound, upper_bound, n_bins):    
    bin_width = (upper_bound - lower_bound) / n_bins
    cut_points = [round(lower_bound + i * bin_width) for i in range(0, n_bins + 1)]
    
    if lower_bound not in cut_points:
        cut_points.insert(0, lower_bound)
    if upper_bound not in cut_points:
        cut_points.append(upper_bound)
    
    return cut_points

# Get cut points of EFB for histogram data
def equal_freq_cut_points(lower_bound, upper_bound, n_bins, hist_data):
    total_count = sum(hist_data)
    bin_size = total_count / n_bins
    cumulative_count = 0
    cut_points = []
    for i in range(len(hist_data)):
        cumulative_count += hist_data[i]
        if cumulative_count >= bin_size:
            cut_point = i + 1 + lower_bound
            cut_points.append(cut_point)
            cumulative_count = 0
        if len(cut_points) == n_bins - 1:
            break
    
    if lower_bound not in cut_points:
        cut_points.insert(0, lower_bound)
    if upper_bound not in cut_points:
        cut_points.append(upper_bound)
            
    return cut_points

In [1]:
def load_data(train_data_path, test_data_dir):
    # Training data
    train_data = np.load(train_data_path)
    
    # Testing data
    test_data_paths = glob(f"{test_data_dir}/*.npy")
    test_data_all = [np.load(test_data_path) for test_data_path in test_data_paths]
        
    return train_data, test_data_all

In [9]:
def preprocess_data(train_data, test_data_all):
    # Training features and labels
    X_train = train_data[:, :-1]
    X_train = X_train / sum(X_train[0])
    y_train = train_data[1:, -1]

    # Testing features and labels
    X_test_all = [test_data[:, :-1] / sum(test_data[0, :-1]) for test_data in test_data_all]
    y_test_all = [test_data[1:, -1] for test_data in test_data_all]

    return X_train, y_train, X_test_all, y_test_all

In [None]:
def solve(train_data_path, test_data_dir, method):
    _, file_name = os.path.split(train_data_path)
    dist, num_days, _, num_samples, _, ratio = file_name.replace(".npy", "").split("_")

    # Load data
    train_data, test_data_all = load_data(train_data_path, test_data_dir)

    # Preprocess data
    X_train, y_train, X_test_all, y_test_all = preprocess_data(train_data, test_data_all)

    # Array for storing results
    results = []
    num_bins = range(5, 26)
    epsilon = 1e-8 # Smoothing hyperparameters

    for num_bin in num_bins:
        ########################
        ### current solution ###
        ########################
        result = [dist, num_days, num_samples, num_bin]
        print(f"num_bin = {num_bin}")

        #########################
        ### Invoke the solver ###
        #########################
        start_time = process_time()
        if method == "ewb":
            final_bin_edges = equal_width_cut_points(300, 850, num_bin, np.sum(X_train, axis=0))
        elif method == "efb":
            final_bin_edges = equal_freq_cut_points(300, 850, num_bin, np.sum(X_train, axis=0))
        else:
            raise Exception("Not implemented method")
        # final_bin_edges = equal_width_cut_points_naive(300, 850, num_bin)
        end_time = process_time()
        solving_time = end_time - start_time
        result.append(solving_time)
        
        print(f"Time for solving: {solving_time} s")
        print("final_bin_edges =", final_bin_edges, "\n")


        ###############
        ### Evaluation ###
        ###############
        # thresholds = np.arange(0.01, 1.01, 0.01)
        # thresholds = [round(threshold, 2) for threshold in thresholds]
        thresholds = [0.1]
                
        # Training Acccuracy & F1 & F2
        num_days_train = X_train.shape[0]
        best_train_threshold = best_train_precision_0 = best_train_recall_1 = best_train_inverse_weighted_f2 = 0
        best_y_train_pred = [0] * (num_days_train - 1)
        train_acc = train_f1 = 0
        
        for threshold in thresholds:
            y_train_pred = []
            
            for i in range(num_days_train - 1):
                hist_1 = []
                for j in range(len(final_bin_edges) - 1):
                    hist_1.append(np.sum(X_train[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
                hist_1 = np.array(hist_1)

                hist_2 = []
                for j in range(len(final_bin_edges) - 1):
                    hist_2.append(np.sum(X_train[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
                hist_2 = np.array(hist_2)

                psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
                psi = np.sum(psis)
        
                if (y_train[i] == 0 and psi < threshold) or (y_train[i] == 1 and psi >= threshold):
                    y_train_pred.append(y_train[i])
                else:
                    y_train_pred.append(1 - y_train[i])
            
            train_precision_0, train_recall_1, train_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_train, y_train_pred, beta=2.0)
            if train_inverse_weighted_f2 > best_train_inverse_weighted_f2:
                best_train_inverse_weighted_f2 = train_inverse_weighted_f2
                best_train_threshold = threshold
                best_train_precision_0 = train_precision_0
                best_train_recall_1 = train_recall_1
                best_y_train_pred = y_train_pred
                train_acc = accuracy_score(y_train, y_train_pred)

        print("Best threshold:", best_train_threshold)
        result.append(best_train_threshold)

        print("Training Accuracy:", train_acc)
        result.append(train_acc)

        print("Best Training Precision 0:", best_train_precision_0)
        result.append(best_train_precision_0)   

        print("Best Training Recall 1:", best_train_recall_1)
        result.append(best_train_recall_1)

        print("Best Training Inverse Weighted F2", best_train_inverse_weighted_f2)
        result.append(best_train_inverse_weighted_f2) 

        print(confusion_matrix(y_train, best_y_train_pred))
                
        # Testing Acccuracy & F1 & F2
        for i in range(len(X_test_all)):
            X_test, y_test = X_test_all[i], y_test_all[i]
            num_days_test = X_test.shape[0]
            y_test_pred = []

            for i in range(num_days_test - 1):
                hist_1 = []
                for j in range(len(final_bin_edges) - 1):
                    hist_1.append(np.sum(X_test[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
                hist_1 = np.array(hist_1)

                hist_2 = []
                for j in range(len(final_bin_edges) - 1):
                    hist_2.append(np.sum(X_test[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
                hist_2 = np.array(hist_2)

                psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
                psi = np.sum(psis)

                if (y_test[i] == 0 and psi < best_train_threshold) or (y_test[i] == 1 and psi >= best_train_threshold):
                    y_test_pred.append(y_test[i])
                else:
                    y_test_pred.append(1 - y_test[i])

            test_acc = accuracy_score(y_test, y_test_pred)
            print("Testing Accuracy:", test_acc)
            result.append(test_acc)
            
            test_precision_0, test_recall_1, test_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_test, y_test_pred, beta=2.0)

            print("Testing Precision 0:", test_precision_0)
            result.append(test_precision_0)   

            print("Testing Recall 1:", test_recall_1)
            result.append(test_recall_1)

            print("Testing Inverse Weighted F2:", test_inverse_weighted_f2)
            result.append(test_inverse_weighted_f2)

            print(confusion_matrix(y_test, y_test_pred))

        results.append(result)

    return results

In [None]:
def save_results(results, test_data_dir, save_path):
    test_data_paths = glob(f"{test_data_dir}/*.npy")
    id2file = {}
    for i in range(len(test_data_paths)):
        test_file = os.path.split(test_data_paths[i])[1].replace(".npy", "")
        id2file[i] = test_file

    df_columns = ["distribution", "num_days", "num_samples", "num_bin", "solving_time", 
                "best_threshold", "training_acc", "training_precision_0", "training_recall_1", "training_inverse_weighted_f2"]

    for i in range(len(test_data_paths)):
        df_columns.append(f"{id2file[i]}_acc")
        df_columns.append(f"{id2file[i]}_precision_0")
        df_columns.append(f"{id2file[i]}_recall_1")
        df_columns.append(f"{id2file[i]}_inverse_weighted_f2")

    results_df = pd.DataFrame(results, columns=df_columns)
    results_df.to_csv(save_path, index=False)

In [11]:
def main(train_data_path, test_data_dir, method, save_path):
    results = solve(train_data_path, test_data_dir, method)
    save_results(results, test_data_dir, save_path)

In [None]:
train_data_path = "../data/train/old_histogram/logistic_365_days_100000_samples_90.npy"
test_data_dir = "../data/test/logistic/old_histogram"
method = "ewb"
save_path = "../output/test/ewb_results.csv"

if __name__ == "__main__":
    main(train_data_path, test_data_dir, method, save_path)

## Training data

In [2]:
train_data_path = "../data/train/old_histogram/logistic_365_days_100000_samples_90.npy"
train_data = np.load(train_data_path)
train_data.shape

(365, 551)

In [3]:
train_data

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

## Testing data

In [4]:
test_data_dir = "../data/test/logistic/old_histogram"

In [5]:
test_data_paths = glob(f"{test_data_dir}/*.npy")

In [6]:
id2file = {}
for i in range(len(test_data_paths)):
    test_file = os.path.split(test_data_paths[i])[1].replace(".npy", "")
    id2file[i] = test_file
print(len(id2file))
id2file

4


{0: 'logistic_183_days_10000_samples_70',
 1: 'logistic_365_days_10000_samples_90',
 2: 'logistic_183_days_10000_samples_90',
 3: 'logistic_365_days_10000_samples_70'}

In [7]:
test_data_all = [np.load(test_data_path) for test_data_path in test_data_paths]

In [8]:
test_data_all[0].shape

(183, 551)

# Preprocessing

## Training data

In [9]:
X_train = train_data[:, :-1]
X_train = X_train / sum(X_train[0])
X_train.shape

(365, 550)

In [10]:
y_train = train_data[1:, -1]
y_train.shape

(364,)

In [11]:
print(f"0: {len(y_train[y_train == 0])}")
print(f"1: {len(y_train[y_train == 1])}")

0: 335
1: 29


## Testing data

In [12]:
X_test_all = [test_data[:, :-1] / sum(test_data[0, :-1]) for test_data in test_data_all]
print(X_test_all[0].shape)

(183, 550)


In [13]:
y_test_all = [test_data[1:, -1] for test_data in test_data_all]
y_test_all[0].shape

(182,)

In [14]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

In [15]:
def precision_0_recall_1_inverse_weighted_fbeta(y_true, y_pred, beta=2.0):
    precisions, recalls, fbeta_scores, supports = precision_recall_fscore_support(y_true, y_pred, beta=beta, average=None)

    precision_0 = round(precisions[0], 4)
    recall_1 = round(recalls[1], 4)
    ratio_0, ratio_1 = supports / sum(supports)
    inverse_weighted_fbeta_score = round(fbeta_scores[0]*ratio_1 + fbeta_scores[1]*ratio_0, 4)
    
    return precision_0, recall_1, inverse_weighted_fbeta_score

In [16]:
train_dir_path, file_name = os.path.split(train_data_path)
dist, num_days, _, num_samples, _, ratio = file_name.replace(".npy", "").split("_")

print(dist)
print(num_days)
print(num_samples)
print(ratio)

logistic
365
100000
90


In [17]:
# Get cut points of EWB for histogram data
def equal_width_cut_points(lower_bound, upper_bound, n_bins, hist_data):
    for i in range(len(hist_data)):
        if hist_data[i] != 0:
            min_value = i + lower_bound
            break
    
    for i in range(len(hist_data) - 1, -1, -1):
        if hist_data[i] != 0:
            max_value = i + lower_bound
            break
    
    bin_width = (max_value - min_value) / n_bins
    cut_points = [round(min_value + i * bin_width) for i in range(0, n_bins + 1)]
    
    if lower_bound not in cut_points:
        cut_points.insert(0, lower_bound)
    if upper_bound not in cut_points:
        cut_points.append(upper_bound)
    
    return cut_points

def equal_width_cut_points_naive(lower_bound, upper_bound, n_bins):    
    bin_width = (upper_bound - lower_bound) / n_bins
    cut_points = [round(lower_bound + i * bin_width) for i in range(0, n_bins + 1)]
    
    if lower_bound not in cut_points:
        cut_points.insert(0, lower_bound)
    if upper_bound not in cut_points:
        cut_points.append(upper_bound)
    
    return cut_points

# Get cut points of EFB for histogram data
def equal_freq_cut_points(lower_bound, upper_bound, n_bins, hist_data):
    total_count = sum(hist_data)
    bin_size = total_count / n_bins
    cumulative_count = 0
    cut_points = []
    for i in range(len(hist_data)):
        cumulative_count += hist_data[i]
        if cumulative_count >= bin_size:
            cut_point = i + 1 + lower_bound
            cut_points.append(cut_point)
            cumulative_count = 0
        if len(cut_points) == n_bins - 1:
            break
    
    if lower_bound not in cut_points:
        cut_points.insert(0, lower_bound)
    if upper_bound not in cut_points:
        cut_points.append(upper_bound)
            
    return cut_points

In [None]:
# # Array for storing results
# results = []
# num_bins = range(5, 26)
# epsilon = 1e-8 # Smoothing hyperparameters

# for num_bin in num_bins:
#     ########################
#     ### current solution ###
#     ########################
#     result = [dist, num_days, num_samples, num_bin]
#     print(f"num_bin = {num_bin}")

#     #########################
#     ### Invoke the solver ###
#     #########################
#     start_time = process_time()
#     final_bin_edges = equal_width_cut_points(300, 850, num_bin, np.sum(X_train, axis=0))
#     # final_bin_edges = equal_width_cut_points_naive(300, 850, num_bin)
#     end_time = process_time()
#     solving_time = end_time - start_time
#     result.append(solving_time)
    
#     print(f"Time for solving: {solving_time} s")
#     print("final_bin_edges =", final_bin_edges, "\n")


#     ###############
#     ### Evaluation ###
#     ###############
#     thresholds = np.arange(0.01, 1.01, 0.01)
#     thresholds = [round(threshold, 2) for threshold in thresholds]
#     # thresholds = [0.1]
              
#     # Training Acccuracy & F1 & F2
#     num_days_train = X_train.shape[0]
#     best_train_threshold = best_train_precision_0 = best_train_recall_1 = best_train_inverse_weighted_f2 = 0
#     best_y_train_pred = [0] * (num_days_train - 1)
#     train_acc = train_f1 = 0
    
#     for threshold in thresholds:
#         y_train_pred = []
        
#         for i in range(num_days_train - 1):
#             hist_1 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_1.append(np.sum(X_train[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_1 = np.array(hist_1)

#             hist_2 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_2.append(np.sum(X_train[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_2 = np.array(hist_2)

#             psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
#             psi = np.sum(psis)
      
#             if (y_train[i] == 0 and psi < threshold) or (y_train[i] == 1 and psi >= threshold):
#                 y_train_pred.append(y_train[i])
#             else:
#                 y_train_pred.append(1 - y_train[i])
        
#         train_precision_0, train_recall_1, train_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_train, y_train_pred, beta=2.0)
#         if train_inverse_weighted_f2 > best_train_inverse_weighted_f2:
#             best_train_inverse_weighted_f2 = train_inverse_weighted_f2
#             best_train_threshold = threshold
#             best_train_precision_0 = train_precision_0
#             best_train_recall_1 = train_recall_1
#             best_y_train_pred = y_train_pred
#             train_acc = accuracy_score(y_train, y_train_pred)

#     print("Best threshold:", best_train_threshold)
#     result.append(best_train_threshold)

#     print("Training Accuracy:", train_acc)
#     result.append(train_acc)

#     print("Best Training Precision 0:", best_train_precision_0)
#     result.append(best_train_precision_0)   

#     print("Best Training Recall 1:", best_train_recall_1)
#     result.append(best_train_recall_1)

#     print("Best Training Inverse Weighted F2", best_train_inverse_weighted_f2)
#     result.append(best_train_inverse_weighted_f2) 

#     print(confusion_matrix(y_train, best_y_train_pred))
              
#     # Testing Acccuracy & F1 & F2
#     for i in range(len(test_data_paths)):
#         X_test, y_test = X_test_all[i], y_test_all[i]
#         num_days_test = X_test.shape[0]
#         y_test_pred = []

#         for i in range(num_days_test - 1):
#             hist_1 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_1.append(np.sum(X_test[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_1 = np.array(hist_1)

#             hist_2 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_2.append(np.sum(X_test[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_2 = np.array(hist_2)

#             psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
#             psi = np.sum(psis)

#             if (y_test[i] == 0 and psi < best_train_threshold) or (y_test[i] == 1 and psi >= best_train_threshold):
#                 y_test_pred.append(y_test[i])
#             else:
#                 y_test_pred.append(1 - y_test[i])

#         test_acc = accuracy_score(y_test, y_test_pred)
#         print("Testing Accuracy:", test_acc)
#         result.append(test_acc)
        
#         test_precision_0, test_recall_1, test_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_test, y_test_pred, beta=2.0)

#         print("Testing Precision 0:", test_precision_0)
#         result.append(test_precision_0)   

#         print("Testing Recall 1:", test_recall_1)
#         result.append(test_recall_1)

#         print("Testing Inverse Weighted F2:", test_inverse_weighted_f2)
#         result.append(test_inverse_weighted_f2)

#         print(confusion_matrix(y_test, y_test_pred))

#     results.append(result)

# Saving the results

In [19]:
df_columns = ["distribution", "num_days", "num_samples", "num_bin", "solving_time", 
           "best_threshold", "training_acc", "training_precision_0", "training_recall_1", "training_inverse_weighted_f2"]

for i in range(len(test_data_paths)):
    df_columns.append(f"{id2file[i]}_acc")
    df_columns.append(f"{id2file[i]}_precision_0")
    df_columns.append(f"{id2file[i]}_recall_1")
    df_columns.append(f"{id2file[i]}_inverse_weighted_f2")
    
len(df_columns)

26

In [20]:
# results_df = pd.DataFrame(results, columns=df_columns)
# results_df

Unnamed: 0,distribution,num_days,num_samples,num_bin,solving_time,best_threshold,training_acc,training_precision_0,training_recall_1,training_inverse_weighted_f2,logistic_183_days_10000_samples_70_acc,logistic_183_days_10000_samples_70_precision_0,logistic_183_days_10000_samples_70_recall_1,logistic_183_days_10000_samples_70_inverse_weighted_f2,logistic_365_days_10000_samples_90_acc,logistic_365_days_10000_samples_90_precision_0,logistic_365_days_10000_samples_90_recall_1,logistic_365_days_10000_samples_90_inverse_weighted_f2,logistic_183_days_10000_samples_90_acc,logistic_183_days_10000_samples_90_precision_0,logistic_183_days_10000_samples_90_recall_1,logistic_183_days_10000_samples_90_inverse_weighted_f2,logistic_365_days_10000_samples_70_acc,logistic_365_days_10000_samples_70_precision_0,logistic_365_days_10000_samples_70_recall_1,logistic_365_days_10000_samples_70_inverse_weighted_f2
0,logistic,365,100000,5,0.000549,0.03,0.961538,0.9938,0.931,0.8789,0.879121,0.9603,0.8864,0.853,0.928571,0.9746,0.7949,0.7769,0.912088,0.9735,0.8261,0.7928,0.923077,0.9417,0.8871,0.9057
1,logistic,365,100000,6,0.000148,0.03,0.96978,0.9969,0.9655,0.9144,0.928571,0.9699,0.9091,0.9016,0.961538,0.9905,0.9231,0.8957,0.934066,0.9933,0.9565,0.8879,0.950549,0.9784,0.9597,0.9491
2,logistic,365,100000,7,0.000207,0.04,0.997253,0.997,0.9655,0.9744,0.923077,0.9697,0.9091,0.8972,0.983516,0.9878,0.8974,0.9159,0.983516,0.9875,0.913,0.9301,0.967033,0.9672,0.9355,0.9547
3,logistic,365,100000,8,0.000192,0.05,0.989011,1.0,1.0,0.9745,0.978022,0.9855,0.9545,0.962,0.980769,0.9969,0.9744,0.9537,0.978022,0.9936,0.9565,0.9457,0.964286,0.9749,0.9516,0.9574
4,logistic,365,100000,9,0.00021,0.04,0.991758,1.0,1.0,0.9808,0.934066,0.9922,0.9773,0.9346,0.980769,1.0,1.0,0.9672,0.978022,1.0,1.0,0.9681,0.96978,1.0,1.0,0.9759
5,logistic,365,100000,10,0.000207,0.06,1.0,1.0,1.0,1.0,0.989011,0.9857,0.9545,0.9715,0.991758,0.9909,0.9231,0.944,0.994505,0.9938,0.9565,0.9692,0.978022,0.9715,0.9435,0.9658
6,logistic,365,100000,11,0.000266,0.07,0.997253,0.997,0.9655,0.9744,0.983516,0.9787,0.9318,0.957,0.983516,0.9819,0.8462,0.8862,1.0,1.0,1.0,1.0,0.978022,0.9677,0.9355,0.9633
7,logistic,365,100000,12,0.000168,0.06,1.0,1.0,1.0,1.0,0.994505,0.9928,0.9773,0.9858,0.991758,0.9969,0.9744,0.9721,1.0,1.0,1.0,1.0,0.991758,0.9917,0.9839,0.9887
8,logistic,365,100000,13,0.000316,0.06,1.0,1.0,1.0,1.0,0.994505,1.0,1.0,0.9952,0.994505,1.0,1.0,0.9904,1.0,1.0,1.0,1.0,0.991758,0.9958,0.9919,0.9911
9,logistic,365,100000,14,0.000152,0.07,1.0,1.0,1.0,1.0,0.989011,0.9857,0.9545,0.9715,0.991758,0.9909,0.9231,0.944,1.0,1.0,1.0,1.0,0.989011,0.9836,0.9677,0.9817


# Defalt threshold 0.1

In [21]:
# Array for storing results
results = []
num_bins = range(5, 26)
epsilon = 1e-8 # Smoothing hyperparameters

for num_bin in num_bins:
    ########################
    ### current solution ###
    ########################
    result = [dist, num_days, num_samples, num_bin]
    print(f"num_bin = {num_bin}")

    #########################
    ### Invoke the solver ###
    #########################
    start_time = process_time()
    final_bin_edges = equal_width_cut_points(300, 850, num_bin, np.sum(X_train, axis=0))
    # final_bin_edges = equal_width_cut_points_naive(300, 850, num_bin)
    end_time = process_time()
    solving_time = end_time - start_time
    result.append(solving_time)
    
    print(f"Time for solving: {solving_time} s")
    print("final_bin_edges =", final_bin_edges, "\n")


    ###############
    ### Evaluation ###
    ###############
    # thresholds = np.arange(0.01, 1.01, 0.01)
    # thresholds = [round(threshold, 2) for threshold in thresholds]
    thresholds = [0.1]
              
    # Training Acccuracy & F1 & F2
    num_days_train = X_train.shape[0]
    best_train_threshold = best_train_precision_0 = best_train_recall_1 = best_train_inverse_weighted_f2 = 0
    best_y_train_pred = [0] * (num_days_train - 1)
    train_acc = train_f1 = 0
    
    for threshold in thresholds:
        y_train_pred = []
        
        for i in range(num_days_train - 1):
            hist_1 = []
            for j in range(len(final_bin_edges) - 1):
                hist_1.append(np.sum(X_train[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_1 = np.array(hist_1)

            hist_2 = []
            for j in range(len(final_bin_edges) - 1):
                hist_2.append(np.sum(X_train[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_2 = np.array(hist_2)

            psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
            psi = np.sum(psis)
      
            if (y_train[i] == 0 and psi < threshold) or (y_train[i] == 1 and psi >= threshold):
                y_train_pred.append(y_train[i])
            else:
                y_train_pred.append(1 - y_train[i])
        
        train_precision_0, train_recall_1, train_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_train, y_train_pred, beta=2.0)
        if train_inverse_weighted_f2 > best_train_inverse_weighted_f2:
            best_train_inverse_weighted_f2 = train_inverse_weighted_f2
            best_train_threshold = threshold
            best_train_precision_0 = train_precision_0
            best_train_recall_1 = train_recall_1
            best_y_train_pred = y_train_pred
            train_acc = accuracy_score(y_train, y_train_pred)

    print("Best threshold:", best_train_threshold)
    result.append(best_train_threshold)

    print("Training Accuracy:", train_acc)
    result.append(train_acc)

    print("Best Training Precision 0:", best_train_precision_0)
    result.append(best_train_precision_0)   

    print("Best Training Recall 1:", best_train_recall_1)
    result.append(best_train_recall_1)

    print("Best Training Inverse Weighted F2", best_train_inverse_weighted_f2)
    result.append(best_train_inverse_weighted_f2) 

    print(confusion_matrix(y_train, best_y_train_pred))
              
    # Testing Acccuracy & F1 & F2
    for i in range(len(test_data_paths)):
        X_test, y_test = X_test_all[i], y_test_all[i]
        num_days_test = X_test.shape[0]
        y_test_pred = []

        for i in range(num_days_test - 1):
            hist_1 = []
            for j in range(len(final_bin_edges) - 1):
                hist_1.append(np.sum(X_test[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_1 = np.array(hist_1)

            hist_2 = []
            for j in range(len(final_bin_edges) - 1):
                hist_2.append(np.sum(X_test[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_2 = np.array(hist_2)

            psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
            psi = np.sum(psis)

            if (y_test[i] == 0 and psi < best_train_threshold) or (y_test[i] == 1 and psi >= best_train_threshold):
                y_test_pred.append(y_test[i])
            else:
                y_test_pred.append(1 - y_test[i])

        test_acc = accuracy_score(y_test, y_test_pred)
        print("Testing Accuracy:", test_acc)
        result.append(test_acc)
        
        test_precision_0, test_recall_1, test_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_test, y_test_pred, beta=2.0)

        print("Testing Precision 0:", test_precision_0)
        result.append(test_precision_0)   

        print("Testing Recall 1:", test_recall_1)
        result.append(test_recall_1)

        print("Testing Inverse Weighted F2:", test_inverse_weighted_f2)
        result.append(test_inverse_weighted_f2)

        print(confusion_matrix(y_test, y_test_pred))

    results.append(result)

num_bin = 5
Time for solving: 0.0010193550000110463 s
final_bin_edges = [300, 407, 495, 584, 672, 761, 849, 850] 

Best threshold: 0.1
Training Accuracy: 0.9478021978021978
Best Training Precision 0: 0.9463
Best Training Recall 1: 0.3448
Best Training Inverse Weighted F2 0.444
[[335   0]
 [ 19  10]]
Testing Accuracy: 0.8296703296703297
Testing Precision 0: 0.8166
Testing Recall 1: 0.2955
Testing Inverse Weighted F2: 0.4921
[[138   0]
 [ 31  13]]
Testing Accuracy: 0.9230769230769231
Testing Precision 0: 0.9207
Testing Recall 1: 0.2821
Testing Inverse Weighted F2: 0.3994
[[325   0]
 [ 28  11]]
Testing Accuracy: 0.9340659340659341
Testing Precision 0: 0.9298
Testing Recall 1: 0.4783
Testing Inverse Weighted F2: 0.591
[[159   0]
 [ 12  11]]
Testing Accuracy: 0.7554945054945055
Testing Precision 0: 0.7295
Testing Recall 1: 0.2823
Testing Inverse Weighted F2: 0.5344
[[240   0]
 [ 89  35]]
num_bin = 6
Time for solving: 0.00020908199999780663 s
final_bin_edges = [300, 407, 481, 554, 628, 702, 

In [22]:
results_df = pd.DataFrame(results, columns=df_columns)
results_df

Unnamed: 0,distribution,num_days,num_samples,num_bin,solving_time,best_threshold,training_acc,training_precision_0,training_recall_1,training_inverse_weighted_f2,logistic_183_days_10000_samples_70_acc,logistic_183_days_10000_samples_70_precision_0,logistic_183_days_10000_samples_70_recall_1,logistic_183_days_10000_samples_70_inverse_weighted_f2,logistic_365_days_10000_samples_90_acc,logistic_365_days_10000_samples_90_precision_0,logistic_365_days_10000_samples_90_recall_1,logistic_365_days_10000_samples_90_inverse_weighted_f2,logistic_183_days_10000_samples_90_acc,logistic_183_days_10000_samples_90_precision_0,logistic_183_days_10000_samples_90_recall_1,logistic_183_days_10000_samples_90_inverse_weighted_f2,logistic_365_days_10000_samples_70_acc,logistic_365_days_10000_samples_70_precision_0,logistic_365_days_10000_samples_70_recall_1,logistic_365_days_10000_samples_70_inverse_weighted_f2
0,logistic,365,100000,5,0.001019,0.1,0.947802,0.9463,0.3448,0.444,0.82967,0.8166,0.2955,0.4921,0.923077,0.9207,0.2821,0.3994,0.934066,0.9298,0.4783,0.591,0.755495,0.7295,0.2823,0.5344
1,logistic,365,100000,6,0.000209,0.1,0.947802,0.9463,0.3448,0.444,0.868132,0.8519,0.4545,0.6205,0.934066,0.9312,0.3846,0.4972,0.934066,0.9298,0.4783,0.591,0.782967,0.7524,0.3629,0.5938
2,logistic,365,100000,7,0.00018,0.1,0.964286,0.9626,0.5517,0.6368,0.901099,0.8846,0.5909,0.7236,0.945055,0.942,0.4872,0.5905,0.961538,0.9578,0.6957,0.7724,0.807692,0.7742,0.4355,0.6456
3,logistic,365,100000,8,0.000198,0.1,0.96978,0.9682,0.6207,0.6973,0.928571,0.9139,0.7045,0.8051,0.958791,0.9559,0.6154,0.7014,0.978022,0.9755,0.8261,0.8734,0.85989,0.8247,0.5887,0.7497
4,logistic,365,100000,9,0.000178,0.1,0.972527,0.971,0.6552,0.7268,0.923077,0.9079,0.6818,0.7891,0.953297,0.9503,0.5641,0.6578,0.961538,0.9578,0.6957,0.7724,0.857143,0.8219,0.5806,0.7444
5,logistic,365,100000,10,0.000192,0.1,0.975275,0.9738,0.6897,0.756,0.93956,0.9262,0.75,0.8366,0.964286,0.9615,0.6667,0.744,0.978022,0.9755,0.8261,0.8734,0.881868,0.8481,0.6532,0.7917
6,logistic,365,100000,11,0.000195,0.1,0.986264,0.9853,0.8276,0.8683,0.950549,0.9388,0.7955,0.8675,0.96978,0.9673,0.7179,0.7858,0.989011,0.9876,0.913,0.9378,0.89011,0.8571,0.6774,0.8071
7,logistic,365,100000,12,0.000181,0.1,0.980769,0.9795,0.7586,0.8129,0.945055,0.9324,0.7727,0.8521,0.96978,0.9673,0.7179,0.7858,0.983516,0.9815,0.8696,0.9059,0.898352,0.8664,0.7016,0.8224
8,logistic,365,100000,13,0.00025,0.1,0.989011,0.9882,0.8621,0.8954,0.950549,0.9388,0.7955,0.8675,0.975275,0.9731,0.7692,0.8266,0.994505,0.9938,0.9565,0.9692,0.912088,0.8824,0.7419,0.8476
9,logistic,365,100000,14,0.000223,0.1,0.991758,0.9911,0.8966,0.9221,0.961538,0.9517,0.8409,0.8979,0.978022,0.976,0.7949,0.8467,0.994505,0.9938,0.9565,0.9692,0.909341,0.8791,0.7339,0.8426


# Dynamic cut points using two consecutive days

## Tune thresholds

In [23]:
# # Array for storing results
# results = []
# num_bins = range(5, 26)
# epsilon = 1e-8 # Smoothing hyperparameters

# for num_bin in num_bins:
#     ########################
#     ### current solution ###
#     ########################
#     result = [dist, num_days, num_samples, num_bin]
#     print(f"num_bin = {num_bin}")

#     #########################
#     ### Invoke the solver ###
#     #########################
#     # start_time = process_time()
#     # final_bin_edges = equal_width_cut_points(300, 850, num_bin, np.sum(X_train, axis=0))
#     # # final_bin_edges = equal_width_cut_points_naive(300, 850, num_bin)
#     # end_time = process_time()
#     # solving_time = end_time - start_time
#     # result.append(solving_time)
    
#     # print(f"Time for solving: {solving_time} s")
#     # print("final_bin_edges =", final_bin_edges, "\n")


#     ###############
#     ### Evaluation ###
#     ###############
#     thresholds = np.arange(0.01, 1.01, 0.01)
#     thresholds = [round(threshold, 2) for threshold in thresholds]
#     # thresholds = [0.1]
              
#     # Training Acccuracy & F1 & F2
#     num_days_train = X_train.shape[0]
#     best_train_threshold = best_train_precision_0 = best_train_recall_1 = best_train_inverse_weighted_f2 = 0
#     best_y_train_pred = [0] * (num_days_train - 1)
#     train_acc = train_f1 = 0
#     best_solving_time = 0
    
#     for threshold in thresholds:
#         y_train_pred = []
#         solving_time = 0
        
#         for i in range(num_days_train - 1):
#             start_time = process_time()
#             final_bin_edges = equal_width_cut_points(300, 850, num_bin, X_train[i] + X_train[i + 1])
#             end_time = process_time()
#             solving_time += end_time - start_time

#             hist_1 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_1.append(np.sum(X_train[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_1 = np.array(hist_1)

#             hist_2 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_2.append(np.sum(X_train[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_2 = np.array(hist_2)

#             psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
#             psi = np.sum(psis)
      
#             if (y_train[i] == 0 and psi < threshold) or (y_train[i] == 1 and psi >= threshold):
#                 y_train_pred.append(y_train[i])
#             else:
#                 y_train_pred.append(1 - y_train[i])
        
#         train_precision_0, train_recall_1, train_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_train, y_train_pred, beta=2.0)
#         if train_inverse_weighted_f2 > best_train_inverse_weighted_f2:
#             best_train_inverse_weighted_f2 = train_inverse_weighted_f2
#             best_train_threshold = threshold
#             best_train_precision_0 = train_precision_0
#             best_train_recall_1 = train_recall_1
#             best_y_train_pred = y_train_pred
#             best_solving_time = solving_time
#             train_acc = accuracy_score(y_train, y_train_pred)

#     print(f"Time for solving: {best_solving_time} s")
#     result.append(best_solving_time)

#     print("Best threshold:", best_train_threshold)
#     result.append(best_train_threshold)

#     print("Training Accuracy:", train_acc)
#     result.append(train_acc)

#     print("Best Training Precision 0:", best_train_precision_0)
#     result.append(best_train_precision_0)   

#     print("Best Training Recall 1:", best_train_recall_1)
#     result.append(best_train_recall_1)

#     print("Best Training Inverse Weighted F2", best_train_inverse_weighted_f2)
#     result.append(best_train_inverse_weighted_f2) 

#     print(confusion_matrix(y_train, best_y_train_pred))
              
#     # Testing Acccuracy & F1 & F2
#     for i in range(len(test_data_paths)):
#         X_test, y_test = X_test_all[i], y_test_all[i]
#         num_days_test = X_test.shape[0]
#         y_test_pred = []

#         for i in range(num_days_test - 1):
#             final_bin_edges = equal_width_cut_points(300, 850, num_bin, X_test[i] + X_test[i + 1])

#             hist_1 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_1.append(np.sum(X_test[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_1 = np.array(hist_1)

#             hist_2 = []
#             for j in range(len(final_bin_edges) - 1):
#                 hist_2.append(np.sum(X_test[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
#             hist_2 = np.array(hist_2)

#             psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
#             psi = np.sum(psis)

#             if (y_test[i] == 0 and psi < best_train_threshold) or (y_test[i] == 1 and psi >= best_train_threshold):
#                 y_test_pred.append(y_test[i])
#             else:
#                 y_test_pred.append(1 - y_test[i])

#         test_acc = accuracy_score(y_test, y_test_pred)
#         print("Testing Accuracy:", test_acc)
#         result.append(test_acc)
        
#         test_precision_0, test_recall_1, test_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_test, y_test_pred, beta=2.0)

#         print("Testing Precision 0:", test_precision_0)
#         result.append(test_precision_0)   

#         print("Testing Recall 1:", test_recall_1)
#         result.append(test_recall_1)

#         print("Testing Inverse Weighted F2:", test_inverse_weighted_f2)
#         result.append(test_inverse_weighted_f2)

#         print(confusion_matrix(y_test, y_test_pred))

#     results.append(result)

num_bin = 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.013569970000418152 s
Best threshold: 0.04
Training Accuracy: 0.9862637362637363
Best Training Precision 0: 0.997
Best Training Recall 1: 0.9655
Best Training Inverse Weighted F2 0.9494
[[331   4]
 [  1  28]]
Testing Accuracy: 0.945054945054945
Testing Precision 0: 0.9923
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9437
[[129   9]
 [  1  43]]
Testing Accuracy: 0.9752747252747253
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9582
[[316   9]
 [  0  39]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9937
Testing Recall 1: 0.9565
Testing Inverse Weighted F2: 0.9612
[[158   1]
 [  1  22]]
Testing Accuracy: 0.9560439560439561
Testing Precision 0: 0.987
Testing Recall 1: 0.9758
Testing Inverse Weighted F2: 0.9581
[[227  13]
 [  3 121]]
num_bin = 6


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.013642969000215999 s
Best threshold: 0.04
Training Accuracy: 0.978021978021978
Best Training Precision 0: 0.994
Best Training Recall 1: 0.931
Best Training Inverse Weighted F2 0.9123
[[329   6]
 [  2  27]]
Testing Accuracy: 0.9010989010989011
Testing Precision 0: 0.9918
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9078
[[121  17]
 [  1  43]]
Testing Accuracy: 0.9725274725274725
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9538
[[315  10]
 [  0  39]]
Testing Accuracy: 0.9560439560439561
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.938
[[151   8]
 [  0  23]]
Testing Accuracy: 0.9642857142857143
Testing Precision 0: 0.9956
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9692
[[228  12]
 [  1 123]]
num_bin = 7


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.01413858099999743 s
Best threshold: 0.05
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.967032967032967
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9714
[[132   6]
 [  0  44]]
Testing Accuracy: 0.9862637362637363
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9764
[[320   5]
 [  0  39]]
Testing Accuracy: 0.978021978021978
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9681
[[155   4]
 [  0  23]]
Testing Accuracy: 0.9697802197802198
Testing Precision 0: 0.9957
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9736
[[230  10]
 [  1 123]]
num_bin = 8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014272006999846099 s
Best threshold: 0.05
Training Accuracy: 0.9917582417582418
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 0.9808
[[332   3]
 [  0  29]]
Testing Accuracy: 0.9395604395604396
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9482
[[127  11]
 [  0  44]]
Testing Accuracy: 0.9807692307692307
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9672
[[318   7]
 [  0  39]]
Testing Accuracy: 0.9725274725274725
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9604
[[154   5]
 [  0  23]]
Testing Accuracy: 0.978021978021978
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9825
[[232   8]
 [  0 124]]
num_bin = 9


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014139732999836951 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9836
Testing Recall 1: 0.9677
Testing Inverse Weighted F2: 0.9817
[[240   0]
 [  4 120]]
num_bin = 10


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.01436573500049576 s
Best threshold: 0.06
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9835164835164835
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9856
[[135   3]
 [  0  44]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9857
[[322   3]
 [  0  39]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 0.9958
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9911
[[238   2]
 [  1 123]]
num_bin = 11


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.013989013999946565 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.981
[[137   1]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9917
Testing Recall 1: 0.9839
Testing Inverse Weighted F2: 0.9909
[[240   0]
 [  2 122]]
num_bin = 12


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014083601999686834 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[138   0]
 [  0  44]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9582
[[324   1]
 [  2  37]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9917
Testing Recall 1: 0.9839
Testing Inverse Weighted F2: 0.9909
[[240   0]
 [  2 122]]
num_bin = 13


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014123399000141035 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[138   0]
 [  0  44]]
Testing Accuracy: 0.9972527472527473
Testing Precision 0: 0.9969
Testing Recall 1: 0.9744
Testing Inverse Weighted F2: 0.9815
[[325   0]
 [  1  38]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9958
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9933
[[239   1]
 [  1 123]]
num_bin = 14


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.015142692999972951 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[138   0]
 [  0  44]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9582
[[324   1]
 [  2  37]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 0.9917
Testing Recall 1: 0.9839
Testing Inverse Weighted F2: 0.9887
[[239   1]
 [  2 122]]
num_bin = 15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014240301999819849 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9904
[[136   2]
 [  0  44]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9969
Testing Recall 1: 0.9744
Testing Inverse Weighted F2: 0.9768
[[324   1]
 [  1  38]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9972527472527473
Testing Precision 0: 0.9959
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9955
[[240   0]
 [  1 123]]
num_bin = 16


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014191144999983862 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[138   0]
 [  0  44]]
Testing Accuracy: 0.9972527472527473
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9952
[[324   1]
 [  0  39]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9958
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9933
[[239   1]
 [  1 123]]
num_bin = 17


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014480820000443373 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[138   0]
 [  0  44]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9969
Testing Recall 1: 0.9744
Testing Inverse Weighted F2: 0.9768
[[324   1]
 [  1  38]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9918
[[158   1]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9958
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9933
[[239   1]
 [  1 123]]
num_bin = 18


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014429769000230408 s
Best threshold: 0.07
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9904
[[136   2]
 [  0  44]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9969
Testing Recall 1: 0.9744
Testing Inverse Weighted F2: 0.9768
[[324   1]
 [  1  38]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 0.9838
[[157   2]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9958
Testing Recall 1: 0.9919
Testing Inverse Weighted F2: 0.9933
[[239   1]
 [  1 123]]
num_bin = 19


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.015019716999916 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9836
Testing Recall 1: 0.9677
Testing Inverse Weighted F2: 0.9817
[[240   0]
 [  4 120]]
num_bin = 20


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.014587892999884389 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9836
Testing Recall 1: 0.9677
Testing Inverse Weighted F2: 0.9817
[[240   0]
 [  4 120]]
num_bin = 21


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.01465569899971797 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 0.9877
Testing Recall 1: 0.9758
Testing Inverse Weighted F2: 0.9863
[[240   0]
 [  3 121]]
num_bin = 22


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.01465827600020475 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9857
Testing Recall 1: 0.9545
Testing Inverse Weighted F2: 0.9715
[[138   0]
 [  2  42]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.989010989010989
Testing Precision 0: 0.9836
Testing Recall 1: 0.9677
Testing Inverse Weighted F2: 0.9817
[[240   0]
 [  4 120]]
num_bin = 23


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.015366928999526408 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.9862637362637363
Testing Precision 0: 0.9796
Testing Recall 1: 0.9597
Testing Inverse Weighted F2: 0.9771
[[240   0]
 [  5 119]]
num_bin = 24


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.01494328700061942 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9917
Testing Recall 1: 0.9839
Testing Inverse Weighted F2: 0.9909
[[240   0]
 [  2 122]]
num_bin = 25


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Time for solving: 0.015043877000266548 s
Best threshold: 0.08
Training Accuracy: 1.0
Best Training Precision 0: 1.0
Best Training Recall 1: 1.0
Best Training Inverse Weighted F2 1.0
[[335   0]
 [  0  29]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9928
Testing Recall 1: 0.9773
Testing Inverse Weighted F2: 0.9858
[[138   0]
 [  1  43]]
Testing Accuracy: 0.9945054945054945
Testing Precision 0: 0.9939
Testing Recall 1: 0.9487
Testing Inverse Weighted F2: 0.9629
[[325   0]
 [  2  37]]
Testing Accuracy: 1.0
Testing Precision 0: 1.0
Testing Recall 1: 1.0
Testing Inverse Weighted F2: 1.0
[[159   0]
 [  0  23]]
Testing Accuracy: 0.9917582417582418
Testing Precision 0: 0.9877
Testing Recall 1: 0.9758
Testing Inverse Weighted F2: 0.9863
[[240   0]
 [  3 121]]


In [24]:
# results_df = pd.DataFrame(results, columns=df_columns)
# results_df

Unnamed: 0,distribution,num_days,num_samples,num_bin,solving_time,best_threshold,training_acc,training_precision_0,training_recall_1,training_inverse_weighted_f2,logistic_183_days_10000_samples_70_acc,logistic_183_days_10000_samples_70_precision_0,logistic_183_days_10000_samples_70_recall_1,logistic_183_days_10000_samples_70_inverse_weighted_f2,logistic_365_days_10000_samples_90_acc,logistic_365_days_10000_samples_90_precision_0,logistic_365_days_10000_samples_90_recall_1,logistic_365_days_10000_samples_90_inverse_weighted_f2,logistic_183_days_10000_samples_90_acc,logistic_183_days_10000_samples_90_precision_0,logistic_183_days_10000_samples_90_recall_1,logistic_183_days_10000_samples_90_inverse_weighted_f2,logistic_365_days_10000_samples_70_acc,logistic_365_days_10000_samples_70_precision_0,logistic_365_days_10000_samples_70_recall_1,logistic_365_days_10000_samples_70_inverse_weighted_f2
0,logistic,365,100000,5,0.01357,0.04,0.986264,0.997,0.9655,0.9494,0.945055,0.9923,0.9773,0.9437,0.975275,1.0,1.0,0.9582,0.989011,0.9937,0.9565,0.9612,0.956044,0.987,0.9758,0.9581
1,logistic,365,100000,6,0.013643,0.04,0.978022,0.994,0.931,0.9123,0.901099,0.9918,0.9773,0.9078,0.972527,1.0,1.0,0.9538,0.956044,1.0,1.0,0.938,0.964286,0.9956,0.9919,0.9692
2,logistic,365,100000,7,0.014139,0.05,1.0,1.0,1.0,1.0,0.967033,1.0,1.0,0.9714,0.986264,1.0,1.0,0.9764,0.978022,1.0,1.0,0.9681,0.96978,0.9957,0.9919,0.9736
3,logistic,365,100000,8,0.014272,0.05,0.991758,1.0,1.0,0.9808,0.93956,1.0,1.0,0.9482,0.980769,1.0,1.0,0.9672,0.972527,1.0,1.0,0.9604,0.978022,1.0,1.0,0.9825
4,logistic,365,100000,9,0.01414,0.07,1.0,1.0,1.0,1.0,0.994505,0.9928,0.9773,0.9858,0.994505,0.9939,0.9487,0.9629,1.0,1.0,1.0,1.0,0.989011,0.9836,0.9677,0.9817
5,logistic,365,100000,10,0.014366,0.06,1.0,1.0,1.0,1.0,0.983516,1.0,1.0,0.9856,0.991758,1.0,1.0,0.9857,0.994505,1.0,1.0,0.9918,0.991758,0.9958,0.9919,0.9911
6,logistic,365,100000,11,0.013989,0.07,1.0,1.0,1.0,1.0,0.989011,0.9928,0.9773,0.981,0.994505,0.9939,0.9487,0.9629,1.0,1.0,1.0,1.0,0.994505,0.9917,0.9839,0.9909
7,logistic,365,100000,12,0.014084,0.07,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.991758,0.9939,0.9487,0.9582,0.994505,1.0,1.0,0.9918,0.994505,0.9917,0.9839,0.9909
8,logistic,365,100000,13,0.014123,0.07,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.997253,0.9969,0.9744,0.9815,0.994505,1.0,1.0,0.9918,0.994505,0.9958,0.9919,0.9933
9,logistic,365,100000,14,0.015143,0.07,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.991758,0.9939,0.9487,0.9582,0.994505,1.0,1.0,0.9918,0.991758,0.9917,0.9839,0.9887


## Default threshold 0.1

In [25]:
# Array for storing results
results = []
num_bins = range(5, 26)
epsilon = 1e-8 # Smoothing hyperparameters

for num_bin in num_bins:
    ########################
    ### current solution ###
    ########################
    result = [dist, num_days, num_samples, num_bin]
    print(f"num_bin = {num_bin}")

    #########################
    ### Invoke the solver ###
    #########################
    # start_time = process_time()
    # final_bin_edges = equal_width_cut_points(300, 850, num_bin, np.sum(X_train, axis=0))
    # # final_bin_edges = equal_width_cut_points_naive(300, 850, num_bin)
    # end_time = process_time()
    # solving_time = end_time - start_time
    # result.append(solving_time)
    
    # print(f"Time for solving: {solving_time} s")
    # print("final_bin_edges =", final_bin_edges, "\n")


    ###############
    ### Evaluation ###
    ###############
    # thresholds = np.arange(0.01, 1.01, 0.01)
    # thresholds = [round(threshold, 2) for threshold in thresholds]
    thresholds = [0.1]
              
    # Training Acccuracy & F1 & F2
    num_days_train = X_train.shape[0]
    best_train_threshold = best_train_precision_0 = best_train_recall_1 = best_train_inverse_weighted_f2 = 0
    best_y_train_pred = [0] * (num_days_train - 1)
    train_acc = train_f1 = 0
    best_solving_time = 0
    
    for threshold in thresholds:
        y_train_pred = []
        solving_time = 0
        
        for i in range(num_days_train - 1):
            start_time = process_time()
            final_bin_edges = equal_width_cut_points(300, 850, num_bin, X_train[i] + X_train[i + 1])
            end_time = process_time()
            solving_time += end_time - start_time

            hist_1 = []
            for j in range(len(final_bin_edges) - 1):
                hist_1.append(np.sum(X_train[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_1 = np.array(hist_1)

            hist_2 = []
            for j in range(len(final_bin_edges) - 1):
                hist_2.append(np.sum(X_train[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_2 = np.array(hist_2)

            psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
            psi = np.sum(psis)
      
            if (y_train[i] == 0 and psi < threshold) or (y_train[i] == 1 and psi >= threshold):
                y_train_pred.append(y_train[i])
            else:
                y_train_pred.append(1 - y_train[i])
        
        train_precision_0, train_recall_1, train_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_train, y_train_pred, beta=2.0)
        if train_inverse_weighted_f2 > best_train_inverse_weighted_f2:
            best_train_inverse_weighted_f2 = train_inverse_weighted_f2
            best_train_threshold = threshold
            best_train_precision_0 = train_precision_0
            best_train_recall_1 = train_recall_1
            best_y_train_pred = y_train_pred
            best_solving_time = solving_time
            train_acc = accuracy_score(y_train, y_train_pred)

    print(f"Time for solving: {best_solving_time} s")
    result.append(best_solving_time)

    print("Best threshold:", best_train_threshold)
    result.append(best_train_threshold)

    print("Training Accuracy:", train_acc)
    result.append(train_acc)

    print("Best Training Precision 0:", best_train_precision_0)
    result.append(best_train_precision_0)   

    print("Best Training Recall 1:", best_train_recall_1)
    result.append(best_train_recall_1)

    print("Best Training Inverse Weighted F2", best_train_inverse_weighted_f2)
    result.append(best_train_inverse_weighted_f2) 

    print(confusion_matrix(y_train, best_y_train_pred))
              
    # Testing Acccuracy & F1 & F2
    for i in range(len(test_data_paths)):
        X_test, y_test = X_test_all[i], y_test_all[i]
        num_days_test = X_test.shape[0]
        y_test_pred = []

        for i in range(num_days_test - 1):
            final_bin_edges = equal_width_cut_points(300, 850, num_bin, X_test[i] + X_test[i + 1])

            hist_1 = []
            for j in range(len(final_bin_edges) - 1):
                hist_1.append(np.sum(X_test[i, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_1 = np.array(hist_1)

            hist_2 = []
            for j in range(len(final_bin_edges) - 1):
                hist_2.append(np.sum(X_test[i + 1, final_bin_edges[j] - 300: final_bin_edges[j + 1] - 300]))
            hist_2 = np.array(hist_2)

            psis = (hist_1 - hist_2) * np.log((hist_1 + epsilon) / (hist_2 + epsilon))
            psi = np.sum(psis)

            if (y_test[i] == 0 and psi < best_train_threshold) or (y_test[i] == 1 and psi >= best_train_threshold):
                y_test_pred.append(y_test[i])
            else:
                y_test_pred.append(1 - y_test[i])

        test_acc = accuracy_score(y_test, y_test_pred)
        print("Testing Accuracy:", test_acc)
        result.append(test_acc)
        
        test_precision_0, test_recall_1, test_inverse_weighted_f2 = precision_0_recall_1_inverse_weighted_fbeta(y_test, y_test_pred, beta=2.0)

        print("Testing Precision 0:", test_precision_0)
        result.append(test_precision_0)   

        print("Testing Recall 1:", test_recall_1)
        result.append(test_recall_1)

        print("Testing Inverse Weighted F2:", test_inverse_weighted_f2)
        result.append(test_inverse_weighted_f2)

        print(confusion_matrix(y_test, y_test_pred))

    results.append(result)

num_bin = 5
Time for solving: 0.01809734899995874 s
Best threshold: 0.1
Training Accuracy: 0.9560439560439561
Best Training Precision 0: 0.9544
Best Training Recall 1: 0.4483
Best Training Inverse Weighted F2 0.5426
[[335   0]
 [ 16  13]]
Testing Accuracy: 0.9120879120879121
Testing Precision 0: 0.8961
Testing Recall 1: 0.6364
Testing Inverse Weighted F2: 0.7566
[[138   0]
 [ 16  28]]
Testing Accuracy: 0.9478021978021978
Testing Precision 0: 0.9448
Testing Recall 1: 0.5128
Testing Inverse Weighted F2: 0.6132
[[325   0]
 [ 19  20]]
Testing Accuracy: 0.945054945054945
Testing Precision 0: 0.9408
Testing Recall 1: 0.5652
Testing Inverse Weighted F2: 0.6656
[[159   0]
 [ 10  13]]
Testing Accuracy: 0.8434065934065934
Testing Precision 0: 0.8081
Testing Recall 1: 0.5403
Testing Inverse Weighted F2: 0.7175
[[240   0]
 [ 57  67]]
num_bin = 6
Time for solving: 0.013898198999925171 s
Best threshold: 0.1
Training Accuracy: 0.9752747252747253
Best Training Precision 0: 0.9738
Best Training Recall 

In [26]:
results_df = pd.DataFrame(results, columns=df_columns)
results_df

Unnamed: 0,distribution,num_days,num_samples,num_bin,solving_time,best_threshold,training_acc,training_precision_0,training_recall_1,training_inverse_weighted_f2,logistic_183_days_10000_samples_70_acc,logistic_183_days_10000_samples_70_precision_0,logistic_183_days_10000_samples_70_recall_1,logistic_183_days_10000_samples_70_inverse_weighted_f2,logistic_365_days_10000_samples_90_acc,logistic_365_days_10000_samples_90_precision_0,logistic_365_days_10000_samples_90_recall_1,logistic_365_days_10000_samples_90_inverse_weighted_f2,logistic_183_days_10000_samples_90_acc,logistic_183_days_10000_samples_90_precision_0,logistic_183_days_10000_samples_90_recall_1,logistic_183_days_10000_samples_90_inverse_weighted_f2,logistic_365_days_10000_samples_70_acc,logistic_365_days_10000_samples_70_precision_0,logistic_365_days_10000_samples_70_recall_1,logistic_365_days_10000_samples_70_inverse_weighted_f2
0,logistic,365,100000,5,0.018097,0.1,0.956044,0.9544,0.4483,0.5426,0.912088,0.8961,0.6364,0.7566,0.947802,0.9448,0.5128,0.6132,0.945055,0.9408,0.5652,0.6656,0.843407,0.8081,0.5403,0.7175
1,logistic,365,100000,6,0.013898,0.1,0.975275,0.9738,0.6897,0.756,0.934066,0.92,0.7273,0.8209,0.958791,0.9559,0.6154,0.7014,0.983516,0.9815,0.8696,0.9059,0.887363,0.8541,0.6694,0.802
2,logistic,365,100000,7,0.013477,0.1,0.967033,0.9654,0.5862,0.6673,0.93956,0.9262,0.75,0.8366,0.96978,0.9673,0.7179,0.7858,0.972527,0.9695,0.7826,0.8404,0.903846,0.8727,0.7177,0.8326
3,logistic,365,100000,8,0.013578,0.1,0.975275,0.9738,0.6897,0.756,0.961538,0.9517,0.8409,0.8979,0.96978,0.9673,0.7179,0.7858,0.994505,0.9938,0.9565,0.9692,0.917582,0.8889,0.7581,0.8576
4,logistic,365,100000,9,0.013615,0.1,0.980769,0.9795,0.7586,0.8129,0.956044,0.9452,0.8182,0.8828,0.978022,0.976,0.7949,0.8467,0.994505,0.9938,0.9565,0.9692,0.925824,0.8989,0.7823,0.8724
5,logistic,365,100000,10,0.013739,0.1,0.986264,0.9853,0.8276,0.8683,0.961538,0.9517,0.8409,0.8979,0.975275,0.9731,0.7692,0.8266,0.994505,0.9938,0.9565,0.9692,0.931319,0.9057,0.7984,0.8822
6,logistic,365,100000,11,0.013785,0.1,0.986264,0.9853,0.8276,0.8683,0.961538,0.9517,0.8409,0.8979,0.978022,0.976,0.7949,0.8467,0.994505,0.9938,0.9565,0.9692,0.936813,0.9125,0.8145,0.892
7,logistic,365,100000,12,0.013966,0.1,0.991758,0.9911,0.8966,0.9221,0.961538,0.9517,0.8409,0.8979,0.978022,0.976,0.7949,0.8467,0.994505,0.9938,0.9565,0.9692,0.934066,0.9091,0.8065,0.8871
8,logistic,365,100000,13,0.015586,0.1,0.991758,0.9911,0.8966,0.9221,0.967033,0.9583,0.8636,0.9129,0.978022,0.976,0.7949,0.8467,0.994505,0.9938,0.9565,0.9692,0.936813,0.9125,0.8145,0.892
9,logistic,365,100000,14,0.014066,0.1,0.997253,0.997,0.9655,0.9744,0.961538,0.9517,0.8409,0.8979,0.980769,0.9789,0.8205,0.8666,0.994505,0.9938,0.9565,0.9692,0.93956,0.916,0.8226,0.8968
