In [1]:
import random
import os
import re
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks
import seaborn as sns
import pickle
import torch

pd.set_option('display.expand_frame_repr', False)  # DataFrameを改行せずに表示
pd.set_option('display.max_columns', None)  # すべての列を表示

plt.rcParams["font.size"]=5
plt.rcParams["figure.figsize"]=(2.0, 1.0)
plt.rcParams["figure.dpi"]= 300

In [2]:
def extract_finalQP(filename):
    match = re.search(r'2ndQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def extract_1stQP(filename):
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None

    
def is_double_compressed(mean_difference, final_QP, threshold):
    mean_difference = mean_difference[0]
    final_QP = final_QP[0]
    clamped_mean_difference = np.maximum(mean_difference, 0)
    
    #全体のエネルギーを計算
    energy = np.sum(np.square(clamped_mean_difference))
    # energy = np.sum(np.square(mean_difference))
    
    #QP2より右側のエネルギーを計算
    right_energy = np.sum(np.square(clamped_mean_difference[final_QP+1:52]))
    # right_energy = np.sum(np.square(mean_difference[final_QP+1:52]))
    
    # エネルギー比を計算して閾値と比較
    if energy <= 0:
        return -1
    
    else:
        if (right_energy / energy) > threshold:
            return True
        else:
            return False  

def calculate_mae(file_path):
    try:
        with open(file_path, 'rb') as file:
            loaded_data, loaded_data_shifted = pickle.load(file)
    except Exception as e:
        print(f"Error occurred while loading {file_path}: {e}")
        return None

    # タプル内のリストを抽出
    original_mae = np.array(loaded_data)
    shifted_mae = np.array(loaded_data_shifted)

    # Coding ghostを計算してリストに格納する
    mae_difference = shifted_mae - original_mae
    
    # mae_differenceの各要素においてマイナスの値を0に変換
    mae_difference_positive = np.maximum(mae_difference, 0)
    
    return mae_difference, mae_difference_positive

In [3]:
rootpath_csv = "/Prove/Yoshihisa/HEIF_ghost/HEIF_IMAGES_CSV/"

single_path1 = os.path.join(rootpath_csv, 'HEIF_images_single_csv')
single_path2 = os.path.join(rootpath_csv, 'HEIF_images_second_sameQP_csv')
single_list1 = [os.path.join(single_path1, file) for file in sorted(os.listdir(single_path1))]
single_list2 = [os.path.join(single_path2, file) for file in sorted(os.listdir(single_path2))]

second_largeQP1_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_csv')
second_largeQP1_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_csv')
second_largeQP1_list1 = [os.path.join(second_largeQP1_path1, file) for file in sorted(os.listdir(second_largeQP1_path1))]
second_largeQP1_list2 = [os.path.join(second_largeQP1_path2, file) for file in sorted(os.listdir(second_largeQP1_path2))]

second_sameQP_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_sameQP_csv')
second_sameQP_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_sameQP_csv')
second_sameQP_list1 = [os.path.join(second_sameQP_path1, file) for file in sorted(os.listdir(second_sameQP_path1))]
second_sameQP_list2 = [os.path.join(second_sameQP_path2, file) for file in sorted(os.listdir(second_sameQP_path2))]

second_largeQP2_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_largeQP_csv')
second_largeQP2_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_largeQP_csv')
second_largeQP2_list1 = [os.path.join(second_largeQP2_path1, file) for file in sorted(os.listdir(second_largeQP2_path1))]
second_largeQP2_list2 = [os.path.join(second_largeQP2_path2, file) for file in sorted(os.listdir(second_largeQP2_path2))]

print("single_list1: ", len(single_list1))
print("single_list2: ", len(single_list2))
print()
print("second_largeQP1_list1: ", len(second_largeQP1_list1))
print("second_largeQP1_list2: ", len(second_largeQP1_list2))
print("second_sameQP_list1: ", len(second_sameQP_list1))
print("second_sameQP_list2: ", len(second_sameQP_list2))
print("second_largeQP_list1: ", len(second_largeQP2_list1))
print("second_largeQP_list2: ", len(second_largeQP2_list2))


single_list1:  3080
single_list2:  3080

second_largeQP1_list1:  17556
second_largeQP1_list2:  17556
second_sameQP_list1:  3080
second_sameQP_list2:  3080
second_largeQP_list1:  12012
second_largeQP_list2:  12012


In [4]:
rootpath_pkl = "/Prove/Yoshihisa/HEIF_ghost/PKL/"

single_pathA = os.path.join(rootpath_pkl, 'pkl_single')
single_pathB = os.path.join(rootpath_pkl, 'pkl_second_sameQP')
single_listA = [os.path.join(single_pathA, file) for file in sorted(os.listdir(single_pathA))]
single_listB = [os.path.join(single_pathB, file) for file in sorted(os.listdir(single_pathB))]

second_largeQP1_pathA = os.path.join(rootpath_pkl, 'pkl_second')
second_largeQP1_pathB = os.path.join(rootpath_pkl, 'pkl_triple')
second_largeQP1_listA = [os.path.join(second_largeQP1_pathA, file) for file in sorted(os.listdir(second_largeQP1_pathA))]
second_largeQP1_listB = [os.path.join(second_largeQP1_pathB, file) for file in sorted(os.listdir(second_largeQP1_pathB))]

second_sameQP_pathA = os.path.join(rootpath_pkl, 'pkl_second_sameQP')
second_sameQP_pathB = os.path.join(rootpath_pkl, 'pkl_triple_sameQP')
second_sameQP_listA = [os.path.join(second_sameQP_pathA, file) for file in sorted(os.listdir(second_sameQP_pathA))]
second_sameQP_listB = [os.path.join(second_sameQP_pathB, file) for file in sorted(os.listdir(second_sameQP_pathB))]

second_largeQP2_pathA = os.path.join(rootpath_pkl, 'pkl_second_largeQP')
second_largeQP2_pathB = os.path.join(rootpath_pkl, 'pkl_triple_largeQP')
second_largeQP2_listA = [os.path.join(second_largeQP2_pathA, file) for file in sorted(os.listdir(second_largeQP2_pathA))]
second_largeQP2_listB = [os.path.join(second_largeQP2_pathB, file) for file in sorted(os.listdir(second_largeQP2_pathB))]

print("single_listA: ", len(single_listA))
print("single_listB: ", len(single_listB))
print()
print("second_largeQP1_listA: ", len(second_largeQP1_listA))
print("second_largeQP1_listB: ", len(second_largeQP1_listB))
print("second_sameQP_listA: ", len(second_sameQP_listA))
print("second_sameQP_listB: ", len(second_sameQP_listB))
print("second_largeQP2_listA: ", len(second_largeQP2_listA))
print("second_largeQP2_listB: ", len(second_largeQP2_listB))

single_listA:  3080
single_listB:  3080

second_largeQP1_listA:  17556
second_largeQP1_listB:  17556
second_sameQP_listA:  3080
second_sameQP_listB:  3080
second_largeQP2_listA:  12012
second_largeQP2_listB:  12012


In [5]:
single_csv1 = list(zip(single_list1, single_listA, single_list2, single_listB))
single_csv = random.sample(single_csv1, 2700)

second_largeQP1_csv1 = list(zip(second_largeQP1_list1, second_largeQP1_listA, second_largeQP1_list2, second_largeQP1_listB))
second_largeQP1_csv = random.sample(second_largeQP1_csv1, 900)

second_sameQP_csv1 = list(zip(second_sameQP_list1, second_sameQP_listA, second_sameQP_list2, second_sameQP_listB))
second_sameQP_csv = random.sample(second_sameQP_csv1, 900)

second_largeQP2_csv1 = list(zip(second_largeQP2_list1, second_largeQP2_listA, second_largeQP2_list2, second_largeQP2_listB))
second_largeQP2_csv = random.sample(second_largeQP2_csv1, 900)


train_csv_list = single_csv + second_largeQP1_csv + second_sameQP_csv + second_largeQP2_csv
# train_csv_list = second_largeQP1_csv
print("train_csv_list: ", len(train_csv_list))

train_csv_list:  5400


In [6]:
# Test data
single_test_csv = [item for item in single_csv1 if item not in single_csv]
second_largeQP1_test_csv = [item for item in second_largeQP1_csv1 if item not in second_largeQP1_csv]
second_sameQP_test_csv = [item for item in second_sameQP_csv1 if item not in second_sameQP_csv]
second_largeQP2_test_csv = [item for item in second_largeQP2_csv1 if item not in second_largeQP2_csv]


single_test_csv = random.sample(single_test_csv, 300)
second_largeQP1_test_csv = random.sample(second_largeQP1_test_csv, 300)
second_sameQP_test_csv = random.sample(second_sameQP_test_csv, 300)
second_largeQP2_test_csv = random.sample(second_largeQP2_test_csv, 300)

test_csv_largeQP1_list = single_test_csv + second_largeQP1_test_csv
print("test_csv_largeQP1_list: ", len(test_csv_largeQP1_list))

test_csv_sameQP_list = single_test_csv + second_sameQP_test_csv
print("test_csv_sameQP_list: ", len(test_csv_sameQP_list))

test_csv_largeQP2_list = single_test_csv + second_largeQP2_test_csv
print("test_csv_largeQP2_list: ", len(test_csv_largeQP2_list))

test_csv_largeQP1_list:  600
test_csv_sameQP_list:  600
test_csv_largeQP2_list:  600


In [7]:
pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  
              "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]

luminance_columns = ["LU1_0","LU1_1","LU1_2","LU1_3",
                     "LU1_4","LU1_5","LU1_6","LU1_7",
                     "LU1_8","LU1_9","LU1_10","LU1_11",
                     "LU1_12","LU1_13","LU1_14","LU1_15",
                     "LU1_16","LU1_17","LU1_18","LU1_19",
                     "LU1_20","LU1_21","LU1_22","LU1_23",
                     "LU1_24","LU1_25","LU1_26","LU1_27",
                     "LU1_28","LU1_29","LU1_30","LU1_31",
                     "LU1_32","LU1_33","LU1_34",
                     
                     "LU2_0","LU2_1","LU2_2","LU2_3",
                     "LU2_4","LU2_5","LU2_6","LU2_7",
                     "LU2_8","LU2_9","LU2_10","LU2_11",
                     "LU2_12","LU2_13","LU2_14","LU2_15",
                     "LU2_16","LU2_17","LU2_18","LU2_19",
                     "LU2_20","LU2_21","LU2_22","LU2_23",
                     "LU2_24","LU2_25","LU2_26","LU2_27",
                     "LU2_28","LU2_29","LU2_30","LU2_31",
                     "LU2_32","LU2_33","LU2_34"]

chrominance_columns = ["CH1_0", "CH1_1", "CH1_10", "CH1_26", "CH1_34", "CH1_36", 
                       "CH2_0", "CH2_1", "CH2_10", "CH2_26", "CH2_34", "CH2_36"]

label_columns = ["LABEL"]
mae1_columns = [f"MAE1_{i}" for i in range(52)]
mae2_columns = [f"MAE2_{i}" for i in range(52)]
mae_columns = ["MAE"]
final_qp_columns = ["FINAL_QP"]

# データフレームを初期化
train_df1_1 = pd.DataFrame(columns=pu_columns)
train_df1_2 = pd.DataFrame(columns=luminance_columns)
train_df1_3 = pd.DataFrame(columns=chrominance_columns)
train_df2 = pd.DataFrame(columns=label_columns)
train_df3 = pd.DataFrame(columns=mae1_columns)
train_df4 = pd.DataFrame(columns=mae2_columns)
train_df5 = pd.DataFrame(columns=mae_columns)
train_df6 = pd.DataFrame(columns=final_qp_columns)


for path1, path2, path3, path4 in train_csv_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    train_pkl_list = [path2, path4]
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
    
#     lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
#     lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
#     lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
#     lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

#     average_10_1 = np.mean(lu_values_10_1)
#     average_10_2 = np.mean(lu_values_10_2)
#     average_26_1 = np.mean(lu_values_26_1)
#     average_26_2 = np.mean(lu_values_26_2)
    
    
#     lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
    
    train_df1_1 = pd.concat([train_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    train_df1_2= pd.concat([train_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    train_df1_3 = pd.concat([train_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)

    # label_columnsの値を取得
    train_df2 = pd.concat([train_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

    final_QP = extract_finalQP(train_pkl_list[0])

    # MAEの値を取得
    mae_d1, mae_d1_positive = calculate_mae(train_pkl_list[0])
    _, mae_d2_positive = calculate_mae(train_pkl_list[1])
    
    
    # mae1_columnsの値を取得
    train_df3 = pd.concat([train_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1_positive[i]] for i in range(52)})], ignore_index=True)
    

    # mae2_columnsの値を取得
    train_df4 = pd.concat([train_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2_positive[i]] for i in range(52)})], ignore_index=True)

    # mae_columnsの値を取得
    train_df5 = pd.concat([train_df5, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)

    # final_qp_columnsの値を取得
    train_df6 = pd.concat([train_df6, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)

# インデックスをリセット
train_df1_1.reset_index(drop=True, inplace=True)
train_df1_2.reset_index(drop=True, inplace=True)
train_df1_3.reset_index(drop=True, inplace=True)
train_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
train_df = pd.concat([train_df1_1, train_df1_2, train_df1_3, train_df3, train_df4], axis=1)
train_df_onlyGhost = pd.concat([train_df3, train_df4], axis=1)


In [8]:
print(train_df3.values[10])
# print(train_df6.values)

[0.0 0.0 0.0022184802083333177 0.0010513406250000149 0.004472561458333343
 0.0007600291666667036 0.000565932291666682 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 0.05487763020833358 0.14966029895833288 0.23421475312499962
 0.304424453125 0.456189947916666 0.4055632416666666 0.5152329166666663
 0.6866889312499986 0.743344946875002 0.5805129562499993
 0.5863663072916641 0.8190774552083337 1.184496671874998
 1.4998097635416672 1.6510104000000005 0.9065321395833337
 0.43564863124999675 0.14617095104167444 0.0 0.017374139583334447
 0.026099730208335892 0.0 0.0 0.0]


In [9]:
scaler = MinMaxScaler()

# スケーラーを使って結合したデータをスケーリング
X_train = scaler.fit_transform(train_df)
X_train_onlyGhost = scaler.fit_transform(train_df_onlyGhost)

# pandasをndarrayに変換
MAE = train_df5.values
FINAL_QP = train_df6.values

# ラベルの準備
Y_train = train_df2['LABEL'].astype(int)

print(f'Length of X_train: {len(X_train)}')
print(f'Length of X_train_onlyGhost: {len(X_train_onlyGhost)}')
print(f'Length of Y_train: {len(Y_train)}')
print(f'Length of MAE: {len(MAE)}')
print(f'Length of FINAL_QP: {len(FINAL_QP)}')

Length of X_train: 5400
Length of X_train_onlyGhost: 5400
Length of Y_train: 5400
Length of MAE: 5400
Length of FINAL_QP: 5400


In [10]:
#　LargeQP1

# データフレームを初期化
test_df1_1 = pd.DataFrame(columns=pu_columns)
test_df1_2 = pd.DataFrame(columns=luminance_columns)
test_df1_3 = pd.DataFrame(columns=chrominance_columns)
test_df2 = pd.DataFrame(columns=label_columns)
test_df3 = pd.DataFrame(columns=mae1_columns)
test_df4 = pd.DataFrame(columns=mae2_columns)
test_df5 = pd.DataFrame(columns=mae_columns)
test_df6 = pd.DataFrame(columns=final_qp_columns)


for path1, path2, path3, path4 in test_csv_largeQP1_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    test_pkl_list = [path2, path4]
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
    
#     lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
#     lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
#     lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
#     lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

#     average_10_1 = np.mean(lu_values_10_1)
#     average_10_2 = np.mean(lu_values_10_2)
#     average_26_1 = np.mean(lu_values_26_1)
#     average_26_2 = np.mean(lu_values_26_2)
    
    
#     lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]

    test_df1_1 = pd.concat([test_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    test_df1_2= pd.concat([test_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    test_df1_3 = pd.concat([test_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
        
    # label_columnsの値を取得
    test_df2 = pd.concat([test_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

    test_final_QP = extract_finalQP(test_pkl_list[0])

    # MAEの値を取得
    mae_d1, mae_d1_positive = calculate_mae(test_pkl_list[0])
    _, mae_d2_positive = calculate_mae(test_pkl_list[1])


    # mae1_columnsの値を取得
    test_df3 = pd.concat([test_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1_positive[i]] for i in range(52)})], ignore_index=True)

    # mae2_columnsの値を取得
    test_df4 = pd.concat([test_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2_positive[i]] for i in range(52)})], ignore_index=True)
    
    # mae_columnsの値を取得
    test_df5 = pd.concat([test_df5, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)

    # final_qp_columnsの値を取得
    test_df6 = pd.concat([test_df6, pd.DataFrame({"FINAL_QP": [test_final_QP]})], ignore_index=True)


# インデックスをリセット
test_df1_1.reset_index(drop=True, inplace=True)
test_df1_2.reset_index(drop=True, inplace=True)
test_df1_3.reset_index(drop=True, inplace=True)
test_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
test_largeQP1 = pd.concat([test_df1_1, test_df1_2, test_df1_3, test_df3, test_df4], axis=1)
test_largeQP1_onlyGhost = pd.concat([test_df3, test_df4], axis=1)


In [11]:
# データフレームを初期化
test_sameQP_df1_1 = pd.DataFrame(columns=pu_columns)
test_sameQP_df1_2 = pd.DataFrame(columns=luminance_columns)
test_sameQP_df1_3 = pd.DataFrame(columns=chrominance_columns)
test_sameQP_df2 = pd.DataFrame(columns=label_columns)
test_sameQP_df3 = pd.DataFrame(columns=mae1_columns)
test_sameQP_df4 = pd.DataFrame(columns=mae2_columns)
test_sameQP_df5 = pd.DataFrame(columns=mae_columns)
test_sameQP_df6 = pd.DataFrame(columns=final_qp_columns)


for path1, path2, path3, path4 in test_csv_sameQP_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    test_csv_sameQP_pkl_list = [path2, path4]
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
        
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
    
    
#     lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
#     lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
#     lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
#     lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

#     average_10_1 = np.mean(lu_values_10_1)
#     average_10_2 = np.mean(lu_values_10_2)
#     average_26_1 = np.mean(lu_values_26_1)
#     average_26_2 = np.mean(lu_values_26_2)
    
    
#     lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]

    test_sameQP_df1_1 = pd.concat([test_sameQP_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    test_sameQP_df1_2= pd.concat([test_sameQP_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    test_sameQP_df1_3 = pd.concat([test_sameQP_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
        
    
    # label_columnsの値を取得
    test_sameQP_df2 = pd.concat([test_sameQP_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

    test_sameQP_final_QP = extract_finalQP(test_csv_sameQP_pkl_list[0])

    # MAEの値を取得
    mae_d1, mae_d1_positive = calculate_mae(test_csv_sameQP_pkl_list[0])
    _, mae_d2_positive = calculate_mae(test_csv_sameQP_pkl_list[1])

    # mae1_columnsの値を取得
    test_sameQP_df3 = pd.concat([test_sameQP_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1_positive[i]] for i in range(52)})], ignore_index=True)

    # mae2_columnsの値を取得
    test_sameQP_df4 = pd.concat([test_sameQP_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2_positive[i]] for i in range(52)})], ignore_index=True)

    # mae_columnsの値を取得
    test_sameQP_df5 = pd.concat([test_sameQP_df5, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)

    # final_qp_columnsの値を取得
    test_sameQP_df6 = pd.concat([test_sameQP_df6, pd.DataFrame({"FINAL_QP": [test_sameQP_final_QP]})], ignore_index=True)


# インデックスをリセット
test_sameQP_df1_1.reset_index(drop=True, inplace=True)
test_sameQP_df1_2.reset_index(drop=True, inplace=True)
test_sameQP_df1_3.reset_index(drop=True, inplace=True)
test_sameQP_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
test_sameQP = pd.concat([test_sameQP_df1_1, test_sameQP_df1_2, test_sameQP_df1_3, test_sameQP_df3, test_sameQP_df4], axis=1)
test_sameQP_onlyGhost = pd.concat([test_sameQP_df3, test_sameQP_df4], axis=1)


In [12]:
# データフレームを初期化

test_largeQP2_df1_1 = pd.DataFrame(columns=pu_columns)
test_largeQP2_df1_2 = pd.DataFrame(columns=luminance_columns)
test_largeQP2_df1_3 = pd.DataFrame(columns=chrominance_columns)
test_largeQP2_df2 = pd.DataFrame(columns=label_columns)
test_largeQP2_df3 = pd.DataFrame(columns=mae1_columns)
test_largeQP2_df4 = pd.DataFrame(columns=mae2_columns)
test_largeQP2_df5 = pd.DataFrame(columns=mae_columns)
test_largeQP2_df6 = pd.DataFrame(columns=final_qp_columns)


for path1, path2, path3, path4 in test_csv_largeQP2_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    test_csv_largeQP2_pkl_list = [path2, path4]
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
        
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
    
#     lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
#     lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
#     lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
#     lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

#     average_10_1 = np.mean(lu_values_10_1)
#     average_10_2 = np.mean(lu_values_10_2)
#     average_26_1 = np.mean(lu_values_26_1)
#     average_26_2 = np.mean(lu_values_26_2)
    
    
#     lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]

    test_largeQP2_df1_1 = pd.concat([test_largeQP2_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    test_largeQP2_df1_2= pd.concat([test_largeQP2_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    test_largeQP2_df1_3 = pd.concat([test_largeQP2_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)

    # label_columnsの値を取得
    test_largeQP2_df2 = pd.concat([test_largeQP2_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

    test_largeQP2_final_QP = extract_finalQP(test_csv_largeQP2_pkl_list[0])

    # MAEの値を取得    
    mae_d1, mae_d1_positive = calculate_mae(test_csv_largeQP2_pkl_list[0])
    _, mae_d2_positive = calculate_mae(test_csv_largeQP2_pkl_list[1])
    
    # mae1_columnsの値を取得
    test_largeQP2_df3 = pd.concat([test_largeQP2_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1_positive[i]] for i in range(52)})], ignore_index=True)

    # mae2_columnsの値を取得
    test_largeQP2_df4 = pd.concat([test_largeQP2_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2_positive[i]] for i in range(52)})], ignore_index=True)

    # mae_columnsの値を取得
    test_largeQP2_df5 = pd.concat([test_largeQP2_df5, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)

    # final_qp_columnsの値を取得
    test_largeQP2_df6 = pd.concat([test_largeQP2_df6, pd.DataFrame({"FINAL_QP": [test_largeQP2_final_QP]})], ignore_index=True)


# インデックスをリセット
test_largeQP2_df1_1.reset_index(drop=True, inplace=True)
test_largeQP2_df1_2.reset_index(drop=True, inplace=True)
test_largeQP2_df1_3.reset_index(drop=True, inplace=True)
test_largeQP2_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
test_largeQP2 = pd.concat([test_largeQP2_df1_1, test_largeQP2_df1_2, test_largeQP2_df1_3, test_largeQP2_df3, test_largeQP2_df4], axis=1)
test_largeQP2_onlyGhost = pd.concat([test_largeQP2_df3, test_largeQP2_df4], axis=1)


In [13]:
scaler = MinMaxScaler()

# スケーラーを使って結合したデータをスケーリング
X_test_largeQP1 = scaler.fit_transform(test_largeQP1)
X_test_largeQP1_onlyGhost = scaler.fit_transform(test_largeQP1_onlyGhost)

X_test_sameQP = scaler.fit_transform(test_sameQP)
X_test_sameQP_onlyGhost = scaler.fit_transform(test_sameQP_onlyGhost)

X_test_largeQP2 = scaler.fit_transform(test_largeQP2)
X_test_largeQP2_onlyGhost = scaler.fit_transform(test_largeQP2_onlyGhost)

# pandasをndarrayに変換
MAE_largeQP1 = test_df5.values
FINAL_QP_largeQP1 = test_df6.values

MAE_sameQP = test_sameQP_df5.values
FINAL_QP_sameQP = test_sameQP_df6.values

MAE_largeQP2 = test_largeQP2_df5.values
FINAL_QP_largeQP2 = test_largeQP2_df6.values

# ラベルの準備
Y_test_largeQP1 = test_df2['LABEL'].astype(int)
Y_test_sameQP = test_sameQP_df2['LABEL'].astype(int)
Y_test_largeQP2 = test_largeQP2_df2['LABEL'].astype(int)


datasets = [
    ('largeQP1', X_test_largeQP1, X_test_largeQP1_onlyGhost, MAE_largeQP1, FINAL_QP_largeQP1, Y_test_largeQP1),
    ('sameQP', X_test_sameQP, X_test_sameQP_onlyGhost , MAE_sameQP, FINAL_QP_sameQP, Y_test_sameQP),
    ('largeQP2', X_test_largeQP2, X_test_largeQP2_onlyGhost, MAE_largeQP2, FINAL_QP_largeQP2, Y_test_largeQP2)
]

for name, X, X_onlyGhost, MAE, FINAL_QP, Y in datasets:    
    # 出力
    print(f'Length of X_test_{name}: {len(X)}')
    print(f'Length of X_test_{name}_onlyGhost: {len(X_onlyGhost)}')
    print(f'Length of Y_test_{name}: {len(Y)}')
    print(f'Length of MAE_{name}: {len(MAE)}')
    print(f'Length of FINAL_QP_{name}: {len(FINAL_QP)}')
    print('-------------------------------------------')

Length of X_test_largeQP1: 600
Length of X_test_largeQP1_onlyGhost: 600
Length of Y_test_largeQP1: 600
Length of MAE_largeQP1: 600
Length of FINAL_QP_largeQP1: 600
-------------------------------------------
Length of X_test_sameQP: 600
Length of X_test_sameQP_onlyGhost: 600
Length of Y_test_sameQP: 600
Length of MAE_sameQP: 600
Length of FINAL_QP_sameQP: 600
-------------------------------------------
Length of X_test_largeQP2: 600
Length of X_test_largeQP2_onlyGhost: 600
Length of Y_test_largeQP2: 600
Length of MAE_largeQP2: 600
Length of FINAL_QP_largeQP2: 600
-------------------------------------------


In [None]:
# Cの範囲を指定
# C_values = {'C': [0.01, 0.1, 1, 10, 100, 1000, 2000, 3000, 4000, 5000]}
C_values = {'C': [0.01, 0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000, 2000, 3000]}

# kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
kfold = StratifiedKFold(n_splits=9, shuffle=True)

# 結果のデータフレームを初期化
results = pd.DataFrame(columns=['C_RBF','RBF_largeQP1','C_RBF_sameQP','RBF_sameQP','C_RBF_largeQP2','RBF_largeQP2',
                                'C_LINEAR','LINEAR_largeQP1','C_LINEAR_sameQP','LINEAR_sameQP','C_LINEAR_largeQP2','LINEAR_largeQP2',
                                
                                'C_RBF_ghost','RBF_ghost_largeQP1','C_RBF_sameQP_ghost','RBF_ghost_sameQP','C_RBF_largeQP2_ghost','RBF_ghost_largeQP2',
                                'C_LINEAR_ghost','LINEAR_ghost_largeQP1','C_LINEAR_sameQP_ghost','LINEAR_ghost_sameQP','C_LINEAR_largeQP2_ghost','LINEAR_ghost_largeQP2',
                                
                                'Threshold', 'largeQP1_old', 'Threshold_sameQP', 'sameQP_old', 'Threshold_largeQP2', 'largeQP2_old'])


# 訓練データ
O_X_train, O_X_train_onlyGhost = X_train, X_train_onlyGhost
O_Y_train = Y_train

# テストデータ
O_X_test_largeQP1, O_X_test_largeQP1_onlyGhost = X_test_largeQP1, X_test_largeQP1_onlyGhost
O_X_test_sameQP, O_X_test_sameQP_onlyGhost = X_test_sameQP, X_test_sameQP_onlyGhost
O_X_test_largeQP2, O_X_test_largeQP2_onlyGhost = X_test_largeQP2, X_test_largeQP2_onlyGhost

O_Y_test_largeQP1 = Y_test_largeQP1
O_Y_test_sameQP = Y_test_sameQP
O_Y_test_largeQP2 = Y_test_largeQP2

# 閾値用テストデータ
O_test_old, O_test_final_QP = MAE_largeQP1, FINAL_QP_largeQP1
O_test_sameQP_old, O_test_sameQP_final_QP = MAE_sameQP, FINAL_QP_sameQP
O_test_largeQP_old, O_test_largeQP_final_QP = MAE_largeQP2, FINAL_QP_largeQP2


# k-fold cross-validation
for fold, (train_ids, test_ids) in enumerate(kfold.split(O_X_train, O_Y_train)):
    print(f"<Fold-{fold+1}>")
    print()
    
    print(len(train_ids), len(test_ids))
    
    results_old = []
    
    # 全体を訓練ラベルと検証ラベルに分割
    X_train, X_val = O_X_train[train_ids], O_X_train[test_ids]
    X_train_onlyGhost, X_val_onlyGhost = O_X_train_onlyGhost[train_ids], O_X_train_onlyGhost[test_ids]
    
    Y_train, Y_val = O_Y_train[train_ids], O_Y_train[test_ids]
    
    
    # テストデータ・ラベルの処理（RBFとLINEAR）
    X_test_largeQP1, X_test_sameQP, X_test_largeQP2 = O_X_test_largeQP1, O_X_test_sameQP, O_X_test_largeQP2
    Y_test_largeQP1, Y_test_sameQP, Y_test_largeQP2 = O_Y_test_largeQP1, O_Y_test_sameQP, O_Y_test_largeQP2
    
    X_test_largeQP1_onlyGhost, X_test_sameQP_onlyGhost, X_test_largeQP2_onlyGhost = O_X_test_largeQP1_onlyGhost, O_X_test_sameQP_onlyGhost, O_X_test_largeQP2_onlyGhost
    
    
    # テストデータ・ラベルの処理（閾値）
    X_test_old, test_QP = O_test_old, O_test_final_QP
    X_test_sameQP_old, test_sameQP = O_test_sameQP_old, O_test_sameQP_final_QP
    X_test_largeQP_old, test_largeQP = O_test_largeQP_old, O_test_largeQP_final_QP
    
    # 訓練・検証データ（ラベル）を訓練データ（ラベル）と検証データ（ラベル）に分割
    # X_train, X_val, Y_train, Y_val = train_test_split(X_train_val, Y_train_val, test_size=540, random_state=42)
    # X_train_onlyGhost, X_val_onlyGhost, Y_train, Y_val = train_test_split(X_train_val_onlyGhost, Y_train_val, test_size=540, random_state=42)
    
    
    best_threshold = 0
    best_accuracy = 0
    best_predicted_labels = []
    best_ground_truth_labels = []
    
    sameQP_best_threshold = 0
    sameQP_best_accuracy = 0
    sameQP_best_predicted_labels = []
    sameQP_best_ground_truth_labels = []
    
    largeQP_best_threshold = 0
    largeQP_best_accuracy = 0
    largeQP_best_predicted_labels = []
    largeQP_best_ground_truth_labels = []
    
            
            
    for threshold in np.arange(0.01, 1.01, 0.01):
        test_old = np.array([is_double_compressed(X_test_old[i], test_QP[i], threshold) for i in range(600)])
        predicted_labels = test_old.astype(int)
        ground_truth_labels = np.array(Y_test_largeQP1)
        accuracy = np.sum(ground_truth_labels == predicted_labels) / len(ground_truth_labels)
    
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_threshold = threshold
            best_predicted_labels = predicted_labels
            best_ground_truth_labels = ground_truth_labels
            
    for threshold in np.arange(0.01, 1.01, 0.01):
        test_sameQP_old = np.array([is_double_compressed(X_test_sameQP_old[i], test_sameQP[i], threshold) for i in range(600)])
        same_predicted_labels = test_sameQP_old.astype(int)
        same_ground_truth_labels = np.array(Y_test_sameQP)
        same_accuracy = np.sum(same_ground_truth_labels == same_predicted_labels) / len(same_ground_truth_labels)
    
        if same_accuracy > sameQP_best_accuracy:
            sameQP_best_accuracy = same_accuracy
            sameQP_best_threshold = threshold
            sameQP_best_predicted_labels = same_predicted_labels
            sameQP_best_ground_truth_labels = same_ground_truth_labels
                        
    for threshold in np.arange(0.01, 1.01, 0.01):
        test_largeQP_old = np.array([is_double_compressed(X_test_largeQP_old[i], test_largeQP[i], threshold) for i in range(600)])
        large_predicted_labels = test_largeQP_old.astype(int)
        large_ground_truth_labels = np.array(Y_test_largeQP2)
        large_accuracy = np.sum(large_ground_truth_labels == large_predicted_labels) / len(large_ground_truth_labels)
    
        if large_accuracy > largeQP_best_accuracy:
            largeQP_best_accuracy = large_accuracy
            largeQP_best_threshold = threshold
            largeQP_best_predicted_labels = large_predicted_labels
            largeQP_best_ground_truth_labels = large_ground_truth_labels
    
    
    print(best_accuracy)
    print(sameQP_best_accuracy)
    print(largeQP_best_accuracy)
    
    best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF, best_gamma_RBF = 0, None, None, None
    best_val_score_onlyGhost_RBF, best_svm_model_onlyGhost_RBF, best_c_value_onlyGhost_RBF = 0, None, None
    
    best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR, best_gamma_LINEAR = 0, None, None, None
    best_val_score_onlyGhost_LINEAR, best_svm_model_onlyGhost_LINEAR, best_c_value_onlyGhost_LINEAR = 0, None, None
    
    for C_value in C_values['C']:
        
        # SVMモデルのインスタンスを作成
        svm_model_RBF = SVC(kernel='rbf', C=C_value)
        svm_model_onlyGhost_RBF = SVC(kernel='rbf', C=C_value)

        svm_model_LINEAR = SVC(kernel='linear', C=C_value)
        svm_model_onlyGhost_LINEAR = SVC(kernel='linear', C=C_value)


        # 訓練データで訓練
        svm_model_RBF.fit(X_train, Y_train)
        svm_model_onlyGhost_RBF.fit(X_train_onlyGhost, Y_train)

        svm_model_LINEAR.fit(X_train, Y_train)
        svm_model_onlyGhost_LINEAR.fit(X_train_onlyGhost, Y_train)


        val_accuracy_RBF = accuracy_score(Y_val, svm_model_RBF.predict(X_val))
        val_accuracy_onlyGhost_RBF = accuracy_score(Y_val, svm_model_onlyGhost_RBF.predict(X_val_onlyGhost))

        val_accuracy_LINEAR = accuracy_score(Y_val, svm_model_LINEAR.predict(X_val))
        val_accuracy_onlyGhost_LINEAR = accuracy_score(Y_val, svm_model_onlyGhost_LINEAR.predict(X_val_onlyGhost))


        # 検証データでの精度が最も高かった場合、そのモデルを保存
        if val_accuracy_RBF > best_val_score_RBF:
            best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = val_accuracy_RBF, svm_model_RBF, C_value

        if val_accuracy_onlyGhost_RBF > best_val_score_onlyGhost_RBF:
            best_val_score_onlyGhost_RBF, best_svm_model_onlyGhost_RBF, best_c_value_onlyGhost_RBF = val_accuracy_onlyGhost_RBF, svm_model_onlyGhost_RBF, C_value

        if val_accuracy_LINEAR > best_val_score_LINEAR:
            best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = val_accuracy_LINEAR, svm_model_LINEAR, C_value

        if val_accuracy_onlyGhost_LINEAR > best_val_score_onlyGhost_LINEAR:
            best_val_score_onlyGhost_LINEAR, best_svm_model_onlyGhost_LINEAR, best_c_value_onlyGhost_LINEAR = val_accuracy_onlyGhost_LINEAR, svm_model_onlyGhost_LINEAR, C_value
            
    
    # テストデータで評価
    predictions_RBF = best_svm_model_RBF.predict(X_test_largeQP1)
    # predictions_prob_RBF = best_svm_model_RBF.decision_function(X_test)
    accuracy_RBF = accuracy_score(Y_test_largeQP1, predictions_RBF)
    report_RBF = classification_report(Y_test_largeQP1, predictions_RBF, digits=4, zero_division=1)
    print(f'report_RBF:\n{report_RBF}')
    
    predictions_LINEAR = best_svm_model_LINEAR.predict(X_test_largeQP1)
    # predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_test)
    accuracy_LINEAR = accuracy_score(Y_test_largeQP1, predictions_LINEAR)
    report_LINEAR = classification_report(Y_test_largeQP1, predictions_LINEAR, digits=4, zero_division=1)
    print(f'report_LINEAR:\n{report_LINEAR}')
    
    same_predictions_RBF = best_svm_model_RBF.predict(X_test_sameQP)
    # same_predictions_prob_RBF = best_svm_model_RBF.decision_function(X_test_sameQP)
    same_accuracy_RBF = accuracy_score(Y_test_sameQP, same_predictions_RBF)
    same_report_RBF = classification_report(Y_test_sameQP, same_predictions_RBF, digits=4, zero_division=1)
    print(f'same_report_RBF:\n{same_report_RBF}')
    
    same_predictions_LINEAR = best_svm_model_LINEAR.predict(X_test_sameQP)
    # same_predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_test_sameQP)
    same_accuracy_LINEAR = accuracy_score(Y_test_sameQP, same_predictions_LINEAR)
    same_report_LINEAR = classification_report(Y_test_sameQP, same_predictions_LINEAR, digits=4, zero_division=1)
    print(f'same_report_LINEAR:\n{same_report_LINEAR}')
    
    large_predictions_RBF = best_svm_model_RBF.predict(X_test_largeQP2)
    # large_predictions_prob_RBF = best_svm_model_RBF.decision_function(X_test_largeQP)
    large_accuracy_RBF = accuracy_score(Y_test_largeQP2, large_predictions_RBF)
    large_report_RBF = classification_report(Y_test_largeQP2, large_predictions_RBF, digits=4, zero_division=1)
    print(f'large_report_RBF:\n{large_report_RBF}')
    
    large_predictions_LINEAR = best_svm_model_LINEAR.predict(X_test_largeQP2)
    # large_predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_test_largeQP)
    large_accuracy_LINEAR = accuracy_score(Y_test_largeQP2, large_predictions_LINEAR)
    large_report_LINEAR = classification_report(Y_test_largeQP2, large_predictions_LINEAR, digits=4, zero_division=1)
    print(f'large_report_LINEAR:\n{large_report_LINEAR}')
    
    
    # テストデータで評価
    predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_test_largeQP1_onlyGhost)
    # predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_test_onlyGhost)
    accuracy_onlyGhost_RBF = accuracy_score(Y_test_largeQP1, predictions_onlyGhost_RBF)
    report_onlyGhost_RBF = classification_report(Y_test_largeQP1, predictions_onlyGhost_RBF)
    # print(f'report_onlyGhost_RBF:\n{report_onlyGhost_RBF}')
    
    same_predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_test_sameQP_onlyGhost)
    # same_predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_test_sameQP_onlyGhost)
    same_accuracy_onlyGhost_RBF = accuracy_score(Y_test_sameQP, same_predictions_onlyGhost_RBF)
    same_report_onlyGhost_RBF = classification_report(Y_test_sameQP, same_predictions_onlyGhost_RBF)
    # print(f'same_report_onlyGhost_RBF:\n{same_report_onlyGhost_RBF}')
    
    large_predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_test_largeQP2_onlyGhost)
    # large_predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_test_largeQP_onlyGhost)
    large_accuracy_onlyGhost_RBF = accuracy_score(Y_test_largeQP2, large_predictions_onlyGhost_RBF)
    large_report_onlyGhost_RBF = classification_report(Y_test_largeQP2, large_predictions_onlyGhost_RBF)
    # print(f'large_report_onlyGhost_RBF:\n{large_report_onlyGhost_RBF}')
    
    predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_test_largeQP1_onlyGhost)
    # predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_test_onlyGhost)
    accuracy_onlyGhost_LINEAR = accuracy_score(Y_test_largeQP1, predictions_onlyGhost_LINEAR)
    report_onlyGhost_LINEAR = classification_report(Y_test_largeQP1, predictions_onlyGhost_LINEAR)
    # print(f'report_onlyGhost_LINEAR:\n{report_onlyGhost_LINEAR}')
    
    same_predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_test_sameQP_onlyGhost)
    # same_predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_test_sameQP_onlyGhost)
    same_accuracy_onlyGhost_LINEAR = accuracy_score(Y_test_sameQP, same_predictions_onlyGhost_LINEAR)
    same_report_onlyGhost_LINEAR = classification_report(Y_test_sameQP, same_predictions_onlyGhost_LINEAR)
    # print(f'same_report_onlyGhost_LINEAR:\n{same_report_onlyGhost_LINEAR}')
    
    large_predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_test_largeQP2_onlyGhost)
    # large_predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_test_largeQP2_onlyGhost)
    large_accuracy_onlyGhost_LINEAR = accuracy_score(Y_test_largeQP2, large_predictions_onlyGhost_LINEAR)
    large_report_onlyGhost_LINEAR = classification_report(Y_test_largeQP2, large_predictions_onlyGhost_LINEAR)
    # print(f'large_report_onlyGhost_LINEAR:\n{large_report_onlyGhost_LINEAR}')
    
    
    # テストデータで評価
    test_old = classification_report(best_ground_truth_labels, best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    # print(f'test_old:\n{test_old}')
    
    test_sameQP_old = classification_report(sameQP_best_ground_truth_labels, sameQP_best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    # print(f'test_sameQP_old:\n{test_sameQP_old}')
    
    test_largeQP_old = classification_report(largeQP_best_ground_truth_labels, largeQP_best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    # print(f'test_largeQP_old:\n{test_largeQP_old}')
    
        
    # Test結果を保存
    
    result_row ={'C_RBF': best_c_value_RBF,'RBF_largeQP1':accuracy_RBF,
                  'C_RBF_sameQP': best_c_value_RBF,'RBF_sameQP':same_accuracy_RBF,
                  'C_RBF_largeQP2': best_c_value_RBF,'RBF_largeQP2':large_accuracy_RBF,

                  'C_LINEAR': best_c_value_LINEAR,'LINEAR_largeQP1':accuracy_LINEAR,
                  'C_LINEAR_sameQP': best_c_value_LINEAR,'LINEAR_sameQP':same_accuracy_LINEAR,
                  'C_LINEAR_largeQP2': best_c_value_LINEAR,'LINEAR_largeQP2':large_accuracy_LINEAR,
                 
                  'C_RBF_ghost': best_c_value_onlyGhost_RBF,'RBF_ghost_largeQP1':accuracy_onlyGhost_RBF,
                  'C_RBF_sameQP_ghost': best_c_value_onlyGhost_RBF,'RBF_ghost_sameQP':same_accuracy_onlyGhost_RBF,
                  'C_RBF_largeQP_ghost': best_c_value_onlyGhost_RBF,'RBF_ghost_largeQP2':large_accuracy_onlyGhost_RBF,
                  
                  'C_LINEAR_ghost': best_c_value_onlyGhost_LINEAR,'LINEAR_ghost_largeQP1':accuracy_onlyGhost_LINEAR,
                  'C_LINEAR_sameQP_ghost': best_c_value_onlyGhost_LINEAR,'LINEAR_ghost_sameQP':same_accuracy_onlyGhost_LINEAR,
                  'C_LINEAR_largeQP_ghost': best_c_value_onlyGhost_LINEAR,'LINEAR_ghost_largeQP2':large_accuracy_onlyGhost_LINEAR,

                  'Threshold':best_threshold, 'largeQP1_old':best_accuracy, 
                  'Threshold_sameQP':sameQP_best_threshold, 'sameQP_old':sameQP_best_accuracy, 
                  'Threshold_largeQP2':largeQP_best_threshold, 'largeQP2_old':largeQP_best_accuracy}
    
    results = pd.concat([results, pd.DataFrame([result_row])], ignore_index=True)

# 結果を表示
# print(results)

<Fold-1>

4800 600
0.8866666666666667
0.5116666666666667
0.6033333333333334
report_RBF:
              precision    recall  f1-score   support

           0     0.9565    0.8067    0.8752       300
           1     0.8329    0.9633    0.8934       300

    accuracy                         0.8850       600
   macro avg     0.8947    0.8850    0.8843       600
weighted avg     0.8947    0.8850    0.8843       600

report_LINEAR:
              precision    recall  f1-score   support

           0     0.9361    0.8300    0.8799       300
           1     0.8473    0.9433    0.8927       300

    accuracy                         0.8867       600
   macro avg     0.8917    0.8867    0.8863       600
weighted avg     0.8917    0.8867    0.8863       600

same_report_RBF:
              precision    recall  f1-score   support

           0     0.7438    0.7933    0.7677       300
           1     0.7786    0.7267    0.7517       300

    accuracy                         0.7600       600
   macro

In [None]:
def print_stats(column_name, label):
    data = (results[column_name])
    average = round(results[column_name].mean(), 4)
    std_dev = round(results[column_name].std(), 4)
    max_value = round(results[column_name].max(), 4)
    min_value = round(results[column_name].min(), 4)

    # print(f'Test Score {column_name} {label}: {data}')
    print(f'Average Test Score {column_name} {label}: {average}')
    print(f'Standard Deviation of Test Score {column_name} {label}: {std_dev}')
    print(f'Maximum Test Score {column_name} {label}: {max_value}')
    print(f'Minimum Test Score {column_name} {label}: {min_value}')
    print('------------------------------------------------------')

# 'Test_Score'列に関して統計情報を表示
# print_stats('C_RBF', 'with RBF')
print_stats('RBF_largeQP1', 'with RBF')
# print_stats('C_LINEAR', 'with RBF')
print_stats('LINEAR_largeQP1', 'with LINEAR')
print_stats('RBF_ghost_largeQP1', 'with RBF')
print_stats('LINEAR_ghost_largeQP1', 'with LINEAR')
print_stats('largeQP1_old', 'with old model')
print()
# print_stats('C_RBF_sameQP', 'with RBF')
print_stats('RBF_sameQP', 'with RBF')
# print_stats('C_LINEAR_sameQP', 'with RBF')
print_stats('LINEAR_sameQP', 'with LINEAR')
print_stats('RBF_ghost_sameQP', 'with RBF')
print_stats('LINEAR_ghost_sameQP', 'with LINEAR')
print_stats('sameQP_old', 'with old model')
print()
# print_stats('C_RBF_largeQP2', 'with RBF')
print_stats('RBF_largeQP2', 'with RBF')
# print_stats('C_LINEAR_largeQP2', 'with RBF')
print_stats('LINEAR_largeQP2', 'with LINEAR')
print_stats('RBF_ghost_largeQP2', 'with RBF')
print_stats('LINEAR_ghost_largeQP2', 'with LINEAR')
print_stats('largeQP2_old', 'with old model')

In [None]:
print(results['C_RBF'])
print(results['C_RBF_sameQP'])
print(results['C_RBF_largeQP2'])
print()
print(results['C_LINEAR'])
print(results['C_LINEAR_sameQP'])
print(results['C_LINEAR_largeQP2'])