In [1]:
import random
import os
import re
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks
import seaborn as sns
import pickle
import torch

pd.set_option('display.expand_frame_repr', False)  # DataFrameを改行せずに表示
pd.set_option('display.max_columns', None)  # すべての列を表示

plt.rcParams["font.size"]=5
plt.rcParams["figure.figsize"]=(2.0, 1.0)
plt.rcParams["figure.dpi"]= 300

In [2]:
def extract_finalQP(filename):
    match = re.search(r'2ndQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def extract_1stQP(filename):
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None

    
def is_double_compressed(mean_difference, final_QP, threshold):
    mean_difference = mean_difference[0]
    final_QP = final_QP[0]
    clamped_mean_difference = np.maximum(mean_difference, -0.01)
    
    #全体のエネルギーを計算
    energy = np.sum(np.square(clamped_mean_difference))
    # energy = np.sum(np.square(mean_difference))
    
    #QP2より右側のエネルギーを計算
    right_energy = np.sum(np.square(clamped_mean_difference[final_QP+1:52]))
    # right_energy = np.sum(np.square(mean_difference[final_QP+1:52]))
    
    # print('energy: ', energy)
    # print('R-energy: ', right_energy)
    # print('Ratio: ', right_energy / energy)
    
    
    # エネルギー比を計算して閾値と比較
    if energy <= 0:
        return -1
    
    # else:
    #     if (right_energy / energy) > threshold:
    #         return True
    #     else:
    #         return False
        
        
    elif (right_energy / energy) != 0 and (right_energy / energy) > threshold:
        return True
    
    elif (right_energy / energy) != 0 and (right_energy / energy) <= threshold:
        return False
    
    else:
        return -1

def calculate_mae(file_path):
    try:
        with open(file_path, 'rb') as file:
            loaded_data, loaded_data_shifted = pickle.load(file)
    except Exception as e:
        print(f"Error occurred while loading {file_path}: {e}")
        return None

    # タプル内のリストを抽出
    original_mae = np.array(loaded_data)
    shifted_mae = np.array(loaded_data_shifted)

    # Coding ghostを計算してリストに格納する
    mae_difference = shifted_mae - original_mae
    
    # mae_differenceの各要素においてマイナスの値を0に変換
    mae_difference_positive = np.maximum(mae_difference, 0)
    
    return mae_difference, mae_difference_positive

In [3]:
rootpath_csv = "/Prove/Yoshihisa/HEIF_ghost/HEIF_IMAGES_CSV/"

single_path1 = os.path.join(rootpath_csv, 'HEIF_images_single_csv')
single_path2 = os.path.join(rootpath_csv, 'HEIF_images_second_sameQP_csv')
single_list1 = [os.path.join(single_path1, file) for file in sorted(os.listdir(single_path1))]
single_list2 = [os.path.join(single_path2, file) for file in sorted(os.listdir(single_path2))]

second_largeQP1_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_csv')
second_largeQP1_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_csv')
second_largeQP1_list1 = [os.path.join(second_largeQP1_path1, file) for file in sorted(os.listdir(second_largeQP1_path1))]
second_largeQP1_list2 = [os.path.join(second_largeQP1_path2, file) for file in sorted(os.listdir(second_largeQP1_path2))]

second_sameQP_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_sameQP_csv')
second_sameQP_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_sameQP_csv')
second_sameQP_list1 = [os.path.join(second_sameQP_path1, file) for file in sorted(os.listdir(second_sameQP_path1))]
second_sameQP_list2 = [os.path.join(second_sameQP_path2, file) for file in sorted(os.listdir(second_sameQP_path2))]

second_largeQP2_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_largeQP_csv')
second_largeQP2_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_largeQP_csv')
second_largeQP2_list1 = [os.path.join(second_largeQP2_path1, file) for file in sorted(os.listdir(second_largeQP2_path1))]
second_largeQP2_list2 = [os.path.join(second_largeQP2_path2, file) for file in sorted(os.listdir(second_largeQP2_path2))]

print("single_list1: ", len(single_list1))
print("single_list2: ", len(single_list2))
print()
print("second_largeQP1_list1: ", len(second_largeQP1_list1))
print("second_largeQP1_list2: ", len(second_largeQP1_list2))
print("second_sameQP_list1: ", len(second_sameQP_list1))
print("second_sameQP_list2: ", len(second_sameQP_list2))
print("second_largeQP_list1: ", len(second_largeQP2_list1))
print("second_largeQP_list2: ", len(second_largeQP2_list2))

single_list1:  3080
single_list2:  3080

second_largeQP1_list1:  17556
second_largeQP1_list2:  17556
second_sameQP_list1:  3080
second_sameQP_list2:  3080
second_largeQP_list1:  12012
second_largeQP_list2:  12012


In [4]:
rootpath_pkl = "/Prove/Yoshihisa/HEIF_ghost/PKL/"

single_pathA = os.path.join(rootpath_pkl, 'pkl_single')
single_pathB = os.path.join(rootpath_pkl, 'pkl_second_sameQP')
single_listA = [os.path.join(single_pathA, file) for file in sorted(os.listdir(single_pathA))]
single_listB = [os.path.join(single_pathB, file) for file in sorted(os.listdir(single_pathB))]

second_largeQP1_pathA = os.path.join(rootpath_pkl, 'pkl_second')
second_largeQP1_pathB = os.path.join(rootpath_pkl, 'pkl_triple')
second_largeQP1_listA = [os.path.join(second_largeQP1_pathA, file) for file in sorted(os.listdir(second_largeQP1_pathA))]
second_largeQP1_listB = [os.path.join(second_largeQP1_pathB, file) for file in sorted(os.listdir(second_largeQP1_pathB))]

second_sameQP_pathA = os.path.join(rootpath_pkl, 'pkl_second_sameQP')
second_sameQP_pathB = os.path.join(rootpath_pkl, 'pkl_triple_sameQP')
second_sameQP_listA = [os.path.join(second_sameQP_pathA, file) for file in sorted(os.listdir(second_sameQP_pathA))]
second_sameQP_listB = [os.path.join(second_sameQP_pathB, file) for file in sorted(os.listdir(second_sameQP_pathB))]

second_largeQP2_pathA = os.path.join(rootpath_pkl, 'pkl_second_largeQP')
second_largeQP2_pathB = os.path.join(rootpath_pkl, 'pkl_triple_largeQP')
second_largeQP2_listA = [os.path.join(second_largeQP2_pathA, file) for file in sorted(os.listdir(second_largeQP2_pathA))]
second_largeQP2_listB = [os.path.join(second_largeQP2_pathB, file) for file in sorted(os.listdir(second_largeQP2_pathB))]

print("single_listA: ", len(single_listA))
print("single_listB: ", len(single_listB))
print()
print("second_largeQP1_listA: ", len(second_largeQP1_listA))
print("second_largeQP1_listB: ", len(second_largeQP1_listB))
print("second_sameQP_listA: ", len(second_sameQP_listA))
print("second_sameQP_listB: ", len(second_sameQP_listB))
print("second_largeQP2_listA: ", len(second_largeQP2_listA))
print("second_largeQP2_listB: ", len(second_largeQP2_listB))

single_listA:  3080
single_listB:  3080

second_largeQP1_listA:  17556
second_largeQP1_listB:  17556
second_sameQP_listA:  3080
second_sameQP_listB:  3080
second_largeQP2_listA:  12012
second_largeQP2_listB:  12012


In [5]:
single_csv = list(zip(single_list1, single_listA, single_list2, single_listB))
single_csv = random.sample(single_csv, 3000)

second_largeQP1_csv = list(zip(second_largeQP1_list1, second_largeQP1_listA, second_largeQP1_list2, second_largeQP1_listB))
second_largeQP1_csv = random.sample(second_largeQP1_csv, 1000)
second_sameQP_csv = list(zip(second_sameQP_list1, second_sameQP_listA, second_sameQP_list2, second_sameQP_listB))
second_sameQP_csv = random.sample(second_sameQP_csv, 1000)
second_largeQP2_csv = list(zip(second_largeQP2_list1, second_largeQP2_listA, second_largeQP2_list2, second_largeQP2_listB))
second_largeQP2_csv = random.sample(second_largeQP2_csv, 1000)

train_csv_list = single_csv + second_largeQP1_csv + second_sameQP_csv + second_largeQP2_csv

print("train_csv_list: ", len(train_csv_list))

train_csv_list:  6000


In [6]:
pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  
              "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]

luminance_columns = ["LU1_0","LU1_1","LU1_2","LU1_3",
                     "LU1_4","LU1_5","LU1_6","LU1_7",
                     "LU1_8","LU1_9","LU1_10","LU1_11",
                     "LU1_12","LU1_13","LU1_14","LU1_15",
                     "LU1_16","LU1_17","LU1_18","LU1_19",
                     "LU1_20","LU1_21","LU1_22","LU1_23",
                     "LU1_24","LU1_25","LU1_26","LU1_27",
                     "LU1_28","LU1_29","LU1_30","LU1_31",
                     "LU1_32","LU1_33","LU1_34",
                     
                     "LU2_0","LU2_1","LU2_2","LU2_3",
                     "LU2_4","LU2_5","LU2_6","LU2_7",
                     "LU2_8","LU2_9","LU2_10","LU2_11",
                     "LU2_12","LU2_13","LU2_14","LU2_15",
                     "LU2_16","LU2_17","LU2_18","LU2_19",
                     "LU2_20","LU2_21","LU2_22","LU2_23",
                     "LU2_24","LU2_25","LU2_26","LU2_27",
                     "LU2_28","LU2_29","LU2_30","LU2_31",
                     "LU2_32","LU2_33","LU2_34"]

chrominance_columns = ["CH1_0", "CH1_1", "CH1_10", "CH1_26", "CH1_34", "CH1_36", 
                       "CH2_0", "CH2_1", "CH2_10", "CH2_26", "CH2_34", "CH2_36"]

label_columns = ["LABEL"]
mae1_columns = [f"MAE1_{i}" for i in range(52)]
mae2_columns = [f"MAE2_{i}" for i in range(52)]
mae_columns = ["MAE"]
final_qp_columns = ["FINAL_QP"]

# データフレームを初期化
train_df1_1 = pd.DataFrame(columns=pu_columns)
train_df1_2 = pd.DataFrame(columns=luminance_columns)
train_df1_3 = pd.DataFrame(columns=chrominance_columns)
train_df2 = pd.DataFrame(columns=label_columns)
train_df3 = pd.DataFrame(columns=mae1_columns)
train_df4 = pd.DataFrame(columns=mae2_columns)
train_df5 = pd.DataFrame(columns=mae_columns)
train_df6 = pd.DataFrame(columns=final_qp_columns)


for path1, path2, path3, path4 in train_csv_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    train_pkl_list = [path2, path4]
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
    
#     lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
#     lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
#     lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
#     lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

#     average_10_1 = np.mean(lu_values_10_1)
#     average_10_2 = np.mean(lu_values_10_2)
#     average_26_1 = np.mean(lu_values_26_1)
#     average_26_2 = np.mean(lu_values_26_2)
    
    
#     lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
    
    train_df1_1 = pd.concat([train_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    train_df1_2= pd.concat([train_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    train_df1_3 = pd.concat([train_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)

    # label_columnsの値を取得
    train_df2 = pd.concat([train_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

    final_QP = extract_finalQP(train_pkl_list[0])

    # MAEの値を取得
    mae_d1, mae_d1_positive = calculate_mae(train_pkl_list[0])
    _, mae_d2_positive = calculate_mae(train_pkl_list[1])
    
    
    # mae1_columnsの値を取得
    train_df3 = pd.concat([train_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1_positive[i]] for i in range(52)})], ignore_index=True)
    

    # mae2_columnsの値を取得
    train_df4 = pd.concat([train_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2_positive[i]] for i in range(52)})], ignore_index=True)

    # mae_columnsの値を取得
    train_df5 = pd.concat([train_df5, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)

    # final_qp_columnsの値を取得
    train_df6 = pd.concat([train_df6, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)

# インデックスをリセット
train_df1_1.reset_index(drop=True, inplace=True)
train_df1_2.reset_index(drop=True, inplace=True)
train_df1_3.reset_index(drop=True, inplace=True)
train_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
train_df = pd.concat([train_df1_1, train_df1_2, train_df1_3, train_df3, train_df4], axis=1)
train_df_onlyGhost = pd.concat([train_df3, train_df4], axis=1)

In [7]:
# 各データフレームの長さを表示
print(f'Length of train_df: {len(train_df)}')
print(f'Length of train_df_onlyGhost: {len(train_df_onlyGhost)}')
print(f'Length of train_df5: {len(train_df5)}')
print(f'Length of train_df6: {len(train_df6)}')

Length of train_df: 6000
Length of train_df_onlyGhost: 6000
Length of train_df5: 6000
Length of train_df6: 6000


In [8]:
scaler = MinMaxScaler()

# スケーラーを使って結合したデータをスケーリング
X_train = scaler.fit_transform(train_df)
X_train_onlyGhost = scaler.fit_transform(train_df_onlyGhost)

# pandasをndarrayに変換
MAE = train_df5.values
FINAL_QP = train_df6.values

# ラベルの準備
Y_train = train_df2['LABEL'].astype(int)

print(f'Length of X_train: {len(X_train)}')
print(f'Length of X_train_onlyGhost: {len(X_train_onlyGhost)}')
print(f'Length of Y_train: {len(Y_train)}')
print(f'Length of MAE: {len(MAE)}')
print(f'Length of FINAL_QP: {len(FINAL_QP)}')

Length of X_train: 6000
Length of X_train_onlyGhost: 6000
Length of Y_train: 6000
Length of MAE: 6000
Length of FINAL_QP: 6000


In [9]:
# Cの範囲を指定
# C_values = {'C': [0.01, 0.1, 1, 10, 100, 1000, 2000, 3000, 4000, 5000]}
C_values = {'C': [0.01, 0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 2000, 3000]}
kfold = StratifiedKFold(n_splits=10, shuffle=True)

# 結果のデータフレームを初期化
results = pd.DataFrame(columns=['C_RBF', 'Test_Score_RBF', 'C_LINEAR', 'Test_Score_LINEAR', 
                                'C_onlyGhost_RBF', 'Test_Score_onlyGhost_RBF', 'C_onlyGhost_LINEAR', 'Test_Score_onlyGhost_LINEAR',
                                'Threshold', 'Test_Score_old'])

initial_X, initial_X_onlyGhost = X_train, X_train_onlyGhost
initial_Y = Y_train
initial_old, initial_final_QP = MAE, FINAL_QP


# k-fold cross-validation
for fold, (train_ids, test_ids) in enumerate(kfold.split(initial_X, initial_Y)):
    print(f"<Fold-{fold+1}>")
    print()
    
    results_old = []

    # 全体を訓練・検証データとテストデータに分割
    X_train_val, X_test = initial_X[train_ids], initial_X[test_ids]
    X_train_onlyGhost_val, X_test_onlyGhost = initial_X_onlyGhost[train_ids], initial_X_onlyGhost[test_ids]
    X_train_old_val, X_test_old = initial_old[train_ids], initial_old[test_ids]
    
    final_QP = initial_final_QP[test_ids]
    
    # 全体を訓練・検証ラベルとテストラベルに分割
    Y_train_val, Y_test = initial_Y[train_ids], initial_Y[test_ids]
    
    # 訓練・検証データ（ラベル）を訓練データ（ラベル）と検証データ（ラベル）に分割
    X_train, X_val, Y_train, Y_val = train_test_split(X_train_val, Y_train_val, test_size=600, random_state=42)
    X_train_onlyGhost, X_val_onlyGhost, Y_train, Y_val = train_test_split(X_train_onlyGhost_val, Y_train_val, test_size=600, random_state=42)
    
    # for i in range(600): 
    
    best_threshold = 0
    best_accuracy = 0
    best_predicted_labels = []
    best_ground_truth_labels = []
                
    for threshold in np.arange(0.01, 1.00, 0.01):
        results_old = np.array([is_double_compressed(X_test_old[i], final_QP[i], threshold) for i in range(600)])
        predicted_labels = results_old.astype(int)
        ground_truth_labels = np.array(Y_test)
        accuracy = np.sum(ground_truth_labels == predicted_labels) / len(ground_truth_labels)
    
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_threshold = threshold
            best_predicted_labels = predicted_labels
            best_ground_truth_labels = ground_truth_labels
    
    best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = 0, None, None
    best_val_score_onlyGhost_RBF, best_svm_model_onlyGhost_RBF, best_c_value_onlyGhost_RBF = 0, None, None
    
    best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = 0, None, None
    best_val_score_onlyGhost_LINEAR, best_svm_model_onlyGhost_LINEAR, best_c_value_onlyGhost_LINEAR = 0, None, None
        
    for C_value in C_values['C']:    
        # SVMモデルのインスタンスを作成
        svm_model_RBF = SVC(kernel='rbf', C=C_value)
        svm_model_onlyGhost_RBF = SVC(kernel='rbf', C=C_value)
        
        svm_model_LINEAR = SVC(kernel='linear', C=C_value)
        svm_model_onlyGhost_LINEAR = SVC(kernel='linear', C=C_value)

        # 訓練データで訓練
        svm_model_RBF.fit(X_train, Y_train)
        svm_model_onlyGhost_RBF.fit(X_train_onlyGhost, Y_train)
        
        svm_model_LINEAR.fit(X_train, Y_train)
        svm_model_onlyGhost_LINEAR.fit(X_train_onlyGhost, Y_train)

        
        val_accuracy_RBF = accuracy_score(Y_val, svm_model_RBF.predict(X_val))
        val_accuracy_onlyGhost_RBF = accuracy_score(Y_val, svm_model_onlyGhost_RBF.predict(X_val_onlyGhost))
        
        val_accuracy_LINEAR = accuracy_score(Y_val, svm_model_LINEAR.predict(X_val))
        val_accuracy_onlyGhost_LINEAR = accuracy_score(Y_val, svm_model_onlyGhost_LINEAR.predict(X_val_onlyGhost))
        

        # 検証データでの精度が最も高かった場合、そのモデルを保存
        if val_accuracy_RBF > best_val_score_RBF:
            best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = val_accuracy_RBF, svm_model_RBF, C_value

        if val_accuracy_onlyGhost_RBF > best_val_score_onlyGhost_RBF:
            best_val_score_onlyGhost_RBF, best_svm_model_onlyGhost_RBF, best_c_value_onlyGhost_RBF = val_accuracy_onlyGhost_RBF, svm_model_onlyGhost_RBF, C_value
            
        if val_accuracy_LINEAR > best_val_score_LINEAR:
            best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = val_accuracy_LINEAR, svm_model_LINEAR, C_value

        if val_accuracy_onlyGhost_LINEAR > best_val_score_onlyGhost_LINEAR:
            best_val_score_onlyGhost_LINEAR, best_svm_model_onlyGhost_LINEAR, best_c_value_onlyGhost_LINEAR = val_accuracy_onlyGhost_LINEAR, svm_model_onlyGhost_LINEAR, C_value

            
    # テストデータで評価
    test_predictions_RBF = best_svm_model_RBF.predict(X_test)
    test_predictions_prob_RBF = best_svm_model_RBF.decision_function(X_test)
    test_accuracy_RBF = accuracy_score(Y_test, test_predictions_RBF)
    report_RBF = classification_report(Y_test, test_predictions_RBF, digits=4)
    print(f'Summary_RBF:\n{report_RBF}')
    
    
    test_predictions_LINEAR = best_svm_model_LINEAR.predict(X_test)
    test_predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_test)
    test_accuracy_LINEAR = accuracy_score(Y_test, test_predictions_LINEAR)
    report_LINEAR = classification_report(Y_test, test_predictions_LINEAR, digits=4)
    print(f'Summary_LINEAR:\n{report_LINEAR}')
        
    
    # テストデータで評価
    test_predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_test_onlyGhost)
    test_predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_test_onlyGhost)
    test_accuracy_onlyGhost_RBF = accuracy_score(Y_test, test_predictions_onlyGhost_RBF)
    report_onlyGhost_RBF = classification_report(Y_test, test_predictions_onlyGhost_RBF, digits=4)
    print(f'Summary_onlyGhost_RBF:\n{report_onlyGhost_RBF}')
    
    test_predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_test_onlyGhost)
    test_predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_test_onlyGhost)
    test_accuracy_onlyGhost_LINEAR = accuracy_score(Y_test, test_predictions_onlyGhost_LINEAR)
    report_onlyGhost_LINEAR = classification_report(Y_test, test_predictions_onlyGhost_LINEAR, digits=4)
    print(f'Summary_onlyGhost_LINEAR:\n{report_onlyGhost_LINEAR}')
    

    report_old = classification_report(best_ground_truth_labels, best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    print(f'Summary old_model:\n{report_old}')
        
    # Test結果を保存
    
    result_row = {'C_RBF': best_c_value_RBF, 'Test_Score_RBF': test_accuracy_RBF,
              'C_LINEAR': best_c_value_LINEAR, 'Test_Score_LINEAR': test_accuracy_LINEAR,
              'C_onlyGhost_RBF': best_c_value_onlyGhost_RBF, 'Test_Score_onlyGhost_RBF': test_accuracy_onlyGhost_RBF,
              'C_onlyGhost_LINEAR': best_c_value_onlyGhost_LINEAR, 'Test_Score_onlyGhost_LINEAR': test_accuracy_onlyGhost_LINEAR,
              'Threshold': best_threshold, 'Test_Score_old': best_accuracy}

    results = pd.concat([results, pd.DataFrame([result_row])], ignore_index=True)

# 結果を表示
# print(results)

<Fold-1>

Summary_RBF:
              precision    recall  f1-score   support

           0     0.7717    0.8000    0.7856       300
           1     0.7924    0.7633    0.7776       300

    accuracy                         0.7817       600
   macro avg     0.7820    0.7817    0.7816       600
weighted avg     0.7820    0.7817    0.7816       600

Summary_LINEAR:
              precision    recall  f1-score   support

           0     0.7975    0.8533    0.8245       300
           1     0.8423    0.7833    0.8117       300

    accuracy                         0.8183       600
   macro avg     0.8199    0.8183    0.8181       600
weighted avg     0.8199    0.8183    0.8181       600

Summary_onlyGhost_RBF:
              precision    recall  f1-score   support

           0     0.7699    0.8700    0.8169       300
           1     0.8506    0.7400    0.7914       300

    accuracy                         0.8050       600
   macro avg     0.8102    0.8050    0.8042       600
weighted avg

In [10]:
def print_stats(column_name, label):
    average = round(results[column_name].mean(), 4)
    std_dev = round(results[column_name].std(), 4)
    max_value = round(results[column_name].max(), 4)
    min_value = round(results[column_name].min(), 4)

    print(f'Average Test Score {label}: {average}')
    print(f'Standard Deviation of Test Score {label}: {std_dev}')
    print(f'Maximum Test Score {label}: {max_value}')
    print(f'Minimum Test Score {label}: {min_value}')
    print()

# 'Test_Score'列に関して統計情報を表示
print_stats('Test_Score_RBF', 'with RBF')
print_stats('Test_Score_LINEAR', 'with LINEAR')

# 'Test_Score_onlyGhost'列に関して統計情報を表示
print_stats('Test_Score_onlyGhost_RBF', 'with only Ghost and RBF')
print_stats('Test_Score_onlyGhost_LINEAR', 'with only Ghost and LINEAR')

# 'Test_Score_old'列に関して統計情報を表示
print_stats('Test_Score_old', 'with old model')

Average Test Score with RBF: 0.8018
Standard Deviation of Test Score with RBF: 0.0146
Maximum Test Score with RBF: 0.8283
Minimum Test Score with RBF: 0.7817

Average Test Score with LINEAR: 0.8383
Standard Deviation of Test Score with LINEAR: 0.0155
Maximum Test Score with LINEAR: 0.8667
Minimum Test Score with LINEAR: 0.8133

Average Test Score with only Ghost and RBF: 0.7862
Standard Deviation of Test Score with only Ghost and RBF: 0.0146
Maximum Test Score with only Ghost and RBF: 0.8117
Minimum Test Score with only Ghost and RBF: 0.7633

Average Test Score with only Ghost and LINEAR: 0.7975
Standard Deviation of Test Score with only Ghost and LINEAR: 0.0103
Maximum Test Score with only Ghost and LINEAR: 0.8083
Minimum Test Score with only Ghost and LINEAR: 0.78

Average Test Score with old model: 0.6082
Standard Deviation of Test Score with old model: 0.0108
Maximum Test Score with old model: 0.6267
Minimum Test Score with old model: 0.5933



In [11]:
print(results['C_LINEAR'])

0     200
1      20
2      50
3      40
4    3000
5      80
6     100
7     200
8     100
9    2000
Name: C_LINEAR, dtype: object
