In [3]:
import random
import os
import re
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks

pd.set_option('display.expand_frame_repr', False)  # DataFrameを改行せずに表示
pd.set_option('display.max_columns', None)  # すべての列を表示

In [4]:
rootpath = "/Prove/Yoshihisa/HEIF_ghost/QPD/"

single_train_csv_path1 = os.path.join(rootpath, 'QPD18_HEIF_images_single_csv', 'TRAINING')
single_train_csv_path2 = os.path.join(rootpath, 'QPD18_HEIF_images_second_sameQP_csv', 'TRAINING')

second_train_csv_path1 = os.path.join(rootpath, 'QPD18_HEIF_images_second_csv', 'TRAINING')
second_train_csv_path2 = os.path.join(rootpath, 'QPD18_HEIF_images_triple_csv', 'TRAINING')

single_test_csv_path1 = os.path.join(rootpath, 'QPD18_HEIF_images_single_csv', 'TEST')
single_test_csv_path2 = os.path.join(rootpath, 'QPD18_HEIF_images_second_sameQP_csv', 'TEST')

second_test_csv_path1 = os.path.join(rootpath, 'QPD18_HEIF_images_second_csv', 'TEST')
second_test_csv_path2 = os.path.join(rootpath, 'QPD18_HEIF_images_triple_csv', 'TEST')


single_train_csv_path1_list = [os.path.join(single_train_csv_path1, file) for file in sorted(os.listdir(single_train_csv_path1))]
single_train_csv_path2_list = [os.path.join(single_train_csv_path2, file) for file in sorted(os.listdir(single_train_csv_path2))]

second_train_csv_path1_list = [os.path.join(second_train_csv_path1, file) for file in sorted(os.listdir(second_train_csv_path1))]
second_train_csv_path2_list = [os.path.join(second_train_csv_path2, file) for file in sorted(os.listdir(second_train_csv_path2))]

single_test_csv_path1_list = [os.path.join(single_test_csv_path1, file) for file in sorted(os.listdir(single_test_csv_path1))]
single_test_csv_path2_list = [os.path.join(single_test_csv_path2, file) for file in sorted(os.listdir(single_test_csv_path2))]

second_test_csv_path1_list = [os.path.join(second_test_csv_path1, file) for file in sorted(os.listdir(second_test_csv_path1))]
second_test_csv_path2_list = [os.path.join(second_test_csv_path2, file) for file in sorted(os.listdir(second_test_csv_path2))]


print("single_train_csv_path1_list: ", len(single_train_csv_path1_list))
print("single_train_csv_path2_list: ", len(single_train_csv_path2_list))

print("second_train_csv_path1_list: ", len(second_train_csv_path1_list))
print("second_train_csv_path2_list: ", len(second_train_csv_path2_list))

print("single_test_csv_path1_list: ", len(single_test_csv_path1_list))
print("single_test_csv_path2_list: ", len(single_test_csv_path2_list))

print("second_test_csv_path1_list: ", len(second_test_csv_path1_list))
print("second_test_csv_path2_list: ", len(second_test_csv_path2_list))

single_train_csv_path1_list:  270
single_train_csv_path2_list:  270
second_train_csv_path1_list:  270
second_train_csv_path2_list:  270
single_test_csv_path1_list:  30
single_test_csv_path2_list:  30
second_test_csv_path1_list:  30
second_test_csv_path2_list:  30


In [5]:
rootpath = "/Prove/Yoshihisa/HEIF_ghost/QPD/"

single_train_pkl_path1 = os.path.join(rootpath, 'QPD18_pkl_single', 'TRAINING')
single_train_pkl_path2 = os.path.join(rootpath, 'QPD18_pkl_second_sameQP', 'TRAINING')

second_train_pkl_path1 = os.path.join(rootpath, 'QPD18_pkl_second', 'TRAINING')
second_train_pkl_path2 = os.path.join(rootpath, 'QPD18_pkl_triple', 'TRAINING')

single_test_pkl_path1 = os.path.join(rootpath, 'QPD18_pkl_single', 'TEST')
single_test_pkl_path2 = os.path.join(rootpath, 'QPD18_pkl_second_sameQP', 'TEST')

second_test_pkl_path1 = os.path.join(rootpath, 'QPD18_pkl_second', 'TEST')
second_test_pkl_path2 = os.path.join(rootpath, 'QPD18_pkl_triple', 'TEST')


single_train_pkl_path1_list = [os.path.join(single_train_pkl_path1, file) for file in sorted(os.listdir(single_train_pkl_path1))]
single_train_pkl_path2_list = [os.path.join(single_train_pkl_path2, file) for file in sorted(os.listdir(single_train_pkl_path2))]

second_train_pkl_path1_list = [os.path.join(second_train_pkl_path1, file) for file in sorted(os.listdir(second_train_pkl_path1))]
second_train_pkl_path2_list = [os.path.join(second_train_pkl_path2, file) for file in sorted(os.listdir(second_train_pkl_path2))]

single_test_pkl_path1_list = [os.path.join(single_test_pkl_path1, file) for file in sorted(os.listdir(single_test_pkl_path1))]
single_test_pkl_path2_list = [os.path.join(single_test_pkl_path2, file) for file in sorted(os.listdir(single_test_pkl_path2))]

second_test_pkl_path1_list = [os.path.join(second_test_pkl_path1, file) for file in sorted(os.listdir(second_test_pkl_path1))]
second_test_pkl_path2_list = [os.path.join(second_test_pkl_path2, file) for file in sorted(os.listdir(second_test_pkl_path2))]


print("single_train_pkl_path1_list: ", len(single_train_pkl_path1_list))
print("single_train_pkl_path2_list: ", len(single_train_pkl_path2_list))

print("second_train_pkl_path1_list: ", len(second_train_pkl_path1_list))
print("second_train_pkl_path2_list: ", len(second_train_pkl_path2_list))

print("single_test_pkl_path1_list: ", len(single_test_pkl_path1_list))
print("single_test_pkl_path2_list: ", len(single_test_pkl_path2_list))

print("second_test_pkl_path1_list: ", len(second_test_pkl_path1_list))
print("second_test_pkl_path2_list: ", len(second_test_pkl_path2_list))

single_train_pkl_path1_list:  270
single_train_pkl_path2_list:  270
second_train_pkl_path1_list:  270
second_train_pkl_path2_list:  270
single_test_pkl_path1_list:  30
single_test_pkl_path2_list:  30
second_test_pkl_path1_list:  30
second_test_pkl_path2_list:  30


In [6]:
single_train_csv = list(zip(single_train_csv_path1_list, single_train_pkl_path1_list, single_train_csv_path2_list, single_train_pkl_path2_list))
second_train_csv = list(zip(second_train_csv_path1_list, second_train_pkl_path1_list, second_train_csv_path2_list, second_train_pkl_path2_list))
second_train_csv = random.sample(second_train_csv, len(single_train_csv))

single_test_csv = list(zip(single_test_csv_path1_list, single_test_pkl_path1_list, single_test_csv_path2_list, single_test_pkl_path2_list))
second_test_csv = list(zip(second_test_csv_path1_list, second_test_pkl_path1_list, second_test_csv_path2_list, second_test_pkl_path2_list))
second_test_csv = random.sample(second_test_csv, len(single_test_csv))

train_csv_list = single_train_csv + second_train_csv
test_csv_list = single_test_csv + second_test_csv

print("train_csv_list: ", len(train_csv_list))
print("test_csv_list: ", len(test_csv_list))

train_csv_list:  540
test_csv_list:  60


In [7]:
# new_df = pd.DataFrame(columns=["QP", "CU_64", "CU_32", "CU_16", "CU_8", "PU_64", "PU_32", "PU_16", "PU_8", "PU_4", "LUM_A", "LUM_B", "LUM_C", "CRM_34", "LABEL"])
train_df1 = pd.DataFrame(columns=["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"])
train_df2 = pd.DataFrame(columns=["LABEL"])

train_df3 = pd.DataFrame(columns=["MAE1_0", "MAE1_1", "MAE1_2", "MAE1_3", "MAE1_4", "MAE1_5", "MAE1_6", "MAE1_7", "MAE1_8", "MAE1_9", "MAE1_10",
                                  "MAE1_11", "MAE1_12", "MAE1_13", "MAE1_14", "MAE1_15", "MAE1_16", "MAE1_17", "MAE1_18", "MAE1_19", "MAE1_20", 
                                  "MAE1_21", "MAE1_22", "MAE1_23", "MAE1_24", "MAE1_25", "MAE1_26", "MAE1_27", "MAE1_28", "MAE1_29", "MAE1_30", 
                                  "MAE1_31", "MAE1_32", "MAE1_33", "MAE1_34", "MAE1_35", "MAE1_36", "MAE1_37", "MAE1_38", "MAE1_39", "MAE1_40", 
                                  "MAE1_41", "MAE1_42", "MAE1_43", "MAE1_44", "MAE1_45", "MAE1_46", "MAE1_47", "MAE1_48", "MAE1_49", "MAE1_50", 
                                  "MAE1_51"])

train_df4 = pd.DataFrame(columns=["MAE2_0", "MAE2_1", "MAE2_2", "MAE2_3", "MAE2_4", "MAE2_5", "MAE2_6", "MAE2_7", "MAE2_8", "MAE2_9", "MAE2_10",
                                  "MAE2_11", "MAE2_12", "MAE2_13", "MAE2_14", "MAE2_15", "MAE2_16", "MAE2_17", "MAE2_18", "MAE2_19", "MAE2_20", 
                                  "MAE2_21", "MAE2_22", "MAE2_23", "MAE2_24", "MAE2_25", "MAE2_26", "MAE2_27", "MAE2_28", "MAE2_29", "MAE2_30", 
                                  "MAE2_31", "MAE2_32", "MAE2_33", "MAE2_34", "MAE2_35", "MAE2_36", "MAE2_37", "MAE2_38", "MAE2_39", "MAE2_40", 
                                  "MAE2_41", "MAE2_42", "MAE2_43", "MAE2_44", "MAE2_45", "MAE2_46", "MAE2_47", "MAE2_48", "MAE2_49", "MAE2_50", 
                                  "MAE2_51"])

train_df5 = pd.DataFrame(columns=["MAE1_1", "MAE1_2"])
train_df6 = pd.DataFrame(columns=["MAE2_1", "MAE2_2"])

scaler = MinMaxScaler()

for path1, path2, path3, path4 in train_csv_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    train_pkl_list = [path2, path4]
    
    pu1_64 = df1.loc[0, "pu_counts"]
    pu1_32 = df1.loc[1, "pu_counts"]
    pu1_16 = df1.loc[2, "pu_counts"]
    pu1_8 = df1.loc[3, "pu_counts"]
    pu1_4 = df1.loc[4, "pu_counts"]
    
    pu2_64 = df2.loc[0, "pu_counts"]
    pu2_32 = df2.loc[1, "pu_counts"]
    pu2_16 = df2.loc[2, "pu_counts"]
    pu2_8 = df2.loc[3, "pu_counts"]
    pu2_4 = df2.loc[4, "pu_counts"]
    
    train_df1 = pd.concat([train_df1, pd.DataFrame({
                                          "PU1_64": [pu1_64],
                                          "PU1_32": [pu1_32],
                                          "PU1_16": [pu1_16],
                                          "PU1_8": [pu1_8],
                                          "PU1_4": [pu1_4],
                                          
                                          "PU2_64": [pu2_64],
                                          "PU2_32": [pu2_32],
                                          "PU2_16": [pu2_16],
                                          "PU2_8": [pu2_8],
                                          "PU2_4": [pu2_4],

                                          })], 
                   ignore_index=True)
    
    train_df2 = pd.concat([train_df2, pd.DataFrame({

                                          "LABEL": [label]})], 
                   ignore_index=True)
    
    
    
    

    with open(train_pkl_list[0], 'rb') as file1:
        loaded_data1 = pickle.load(file1)
        
    with open(train_pkl_list[1], 'rb') as file2:
        loaded_data2 = pickle.load(file2)
    
    # 読み込んだデータからMAE結果を取得
    ghost_results1, ghost_results_shifted1 = loaded_data1
    ghost_results2, ghost_results_shifted2 = loaded_data2

    # タプル内のリストを抽出
    original_mae1 = ghost_results1
    shifted_mae1 = ghost_results_shifted1
    
    original_mae2 = ghost_results2
    shifted_mae2 = ghost_results_shifted2

    mae_d1 = [shifted - original for original, shifted in zip(original_mae1, shifted_mae1)]
    mae_d2 = [shifted - original for original, shifted in zip(original_mae2, shifted_mae2)]
    mae_d1 = [0 if val <= 0 else val for val in mae_d1]
    mae_d2 = [0 if val <= 0 else val for val in mae_d2]
    
    peaks_mae_d1, _ = find_peaks(mae_d1)
    peaks_mae_d2, _ = find_peaks(mae_d2)

    # peaks_mae_d1が空の場合、0を代入
    max_peak_index_mae_d1 = peaks_mae_d1[np.argmax([mae_d1[i] for i in peaks_mae_d1])] if peaks_mae_d1.size > 0 else 0

    # peaks_mae_d2が空の場合、0を代入
    max_peak_index_mae_d2 = peaks_mae_d2[np.argmax([mae_d2[i] for i in peaks_mae_d2])] if peaks_mae_d2.size > 0 else 0

    # ２番目に大きいピーク値のインデックスを取得
    sorted_peaks_mae_d1 = np.argsort([mae_d1[i] for i in peaks_mae_d1])
    second_max_peak_index_mae_d1 = peaks_mae_d1[sorted_peaks_mae_d1[-2]] if sorted_peaks_mae_d1.size >= 2 else 0

    sorted_peaks_mae_d2 = np.argsort([mae_d2[i] for i in peaks_mae_d2])
    second_max_peak_index_mae_d2 = peaks_mae_d2[sorted_peaks_mae_d2[-2]] if sorted_peaks_mae_d2.size >= 2 else 0

    
    
    train_df5 = pd.concat([train_df5, pd.DataFrame({
                                          "MAE1_1": [mae_d1[max_peak_index_mae_d1]],
                                          "MAE1_2": [mae_d1[second_max_peak_index_mae_d1]],
        })],

                   ignore_index=True)
    
    train_df6 = pd.concat([train_df6, pd.DataFrame({
                                          "MAE2_1": [mae_d2[max_peak_index_mae_d2]],
                                          "MAE2_2": [mae_d2[second_max_peak_index_mae_d2]],
        })],

                   ignore_index=True)
    
    
    
        
    train_df3 = pd.concat([train_df3, pd.DataFrame({
                                      "MAE1_0": [mae_d1[0]], 
                                      "MAE1_1": [mae_d1[1]],
                                      "MAE1_2": [mae_d1[2]], 
                                      "MAE1_3": [mae_d1[3]], 
                                      "MAE1_4": [mae_d1[4]], 
                                      "MAE1_5": [mae_d1[5]], 
                                      "MAE1_6": [mae_d1[6]], 
                                      "MAE1_7": [mae_d1[7]], 
                                      "MAE1_8": [mae_d1[8]], 
                                      "MAE1_9": [mae_d1[9]], 
                                      "MAE1_10": [mae_d1[10]],
                                      "MAE1_11": [mae_d1[11]], 
                                      "MAE1_12": [mae_d1[12]], 
                                      "MAE1_13": [mae_d1[13]], 
                                      "MAE1_14": [mae_d1[14]], 
                                      "MAE1_15": [mae_d1[15]], 
                                      "MAE1_16": [mae_d1[16]], 
                                      "MAE1_17": [mae_d1[17]], 
                                      "MAE1_18": [mae_d1[18]], 
                                      "MAE1_19": [mae_d1[19]], 
                                      "MAE1_20": [mae_d1[20]], 
                                      "MAE1_21": [mae_d1[21]], 
                                      "MAE1_22": [mae_d1[22]], 
                                      "MAE1_23": [mae_d1[23]], 
                                      "MAE1_24": [mae_d1[24]], 
                                      "MAE1_25": [mae_d1[25]], 
                                      "MAE1_26": [mae_d1[26]], 
                                      "MAE1_27": [mae_d1[27]], 
                                      "MAE1_28": [mae_d1[28]], 
                                      "MAE1_29": [mae_d1[29]], 
                                      "MAE1_30": [mae_d1[30]], 
                                      "MAE1_31": [mae_d1[31]], 
                                      "MAE1_32": [mae_d1[32]], 
                                      "MAE1_33": [mae_d1[33]], 
                                      "MAE1_34": [mae_d1[34]], 
                                      "MAE1_35": [mae_d1[35]], 
                                      "MAE1_36": [mae_d1[36]], 
                                      "MAE1_37": [mae_d1[37]], 
                                      "MAE1_38": [mae_d1[38]], 
                                      "MAE1_39": [mae_d1[39]], 
                                      "MAE1_40": [mae_d1[40]], 
                                      "MAE1_41": [mae_d1[41]], 
                                      "MAE1_42": [mae_d1[42]], 
                                      "MAE1_43": [mae_d1[43]], 
                                      "MAE1_44": [mae_d1[44]], 
                                      "MAE1_45": [mae_d1[45]], 
                                      "MAE1_46": [mae_d1[46]], 
                                      "MAE1_47": [mae_d1[47]], 
                                      "MAE1_48": [mae_d1[48]], 
                                      "MAE1_49": [mae_d1[49]], 
                                      "MAE1_50": [mae_d1[50]], 
                                      "MAE1_51": [mae_d1[51]],
                                        })],
        ignore_index=True)
    
    train_df4 = pd.concat([train_df4, pd.DataFrame({
                                  "MAE2_0": [mae_d2[0]], 
                                  "MAE2_1": [mae_d2[1]],
                                  "MAE2_2": [mae_d2[2]], 
                                  "MAE2_3": [mae_d2[3]], 
                                  "MAE2_4": [mae_d2[4]], 
                                  "MAE2_5": [mae_d2[5]], 
                                  "MAE2_6": [mae_d2[6]], 
                                  "MAE2_7": [mae_d2[7]], 
                                  "MAE2_8": [mae_d2[8]], 
                                  "MAE2_9": [mae_d2[9]], 
                                  "MAE2_10": [mae_d2[10]],
                                  "MAE2_11": [mae_d2[11]], 
                                  "MAE2_12": [mae_d2[12]], 
                                  "MAE2_13": [mae_d2[13]], 
                                  "MAE2_14": [mae_d2[14]], 
                                  "MAE2_15": [mae_d2[15]], 
                                  "MAE2_16": [mae_d2[16]], 
                                  "MAE2_17": [mae_d2[17]], 
                                  "MAE2_18": [mae_d2[18]], 
                                  "MAE2_19": [mae_d2[19]], 
                                  "MAE2_20": [mae_d2[20]], 
                                  "MAE2_21": [mae_d2[21]], 
                                  "MAE2_22": [mae_d2[22]], 
                                  "MAE2_23": [mae_d2[23]], 
                                  "MAE2_24": [mae_d2[24]], 
                                  "MAE2_25": [mae_d2[25]], 
                                  "MAE2_26": [mae_d2[26]], 
                                  "MAE2_27": [mae_d2[27]], 
                                  "MAE2_28": [mae_d2[28]], 
                                  "MAE2_29": [mae_d2[29]], 
                                  "MAE2_30": [mae_d2[30]], 
                                  "MAE2_31": [mae_d2[31]], 
                                  "MAE2_32": [mae_d2[32]], 
                                  "MAE2_33": [mae_d2[33]], 
                                  "MAE2_34": [mae_d2[34]], 
                                  "MAE2_35": [mae_d2[35]], 
                                  "MAE2_36": [mae_d2[36]], 
                                  "MAE2_37": [mae_d2[37]], 
                                  "MAE2_38": [mae_d2[38]], 
                                  "MAE2_39": [mae_d2[39]], 
                                  "MAE2_40": [mae_d2[40]], 
                                  "MAE2_41": [mae_d2[41]], 
                                  "MAE2_42": [mae_d2[42]], 
                                  "MAE2_43": [mae_d2[43]], 
                                  "MAE2_44": [mae_d2[44]], 
                                  "MAE2_45": [mae_d2[45]], 
                                  "MAE2_46": [mae_d2[46]], 
                                  "MAE2_47": [mae_d2[47]], 
                                  "MAE2_48": [mae_d2[48]], 
                                  "MAE2_49": [mae_d2[49]], 
                                  "MAE2_50": [mae_d2[50]], 
                                  "MAE2_51": [mae_d2[51]],
                                    })],
        ignore_index=True)

    train_df = pd.concat([train_df1, train_df3, train_df4], axis=1)
    
print(len(train_df))

540


In [8]:
# new_df = pd.DataFrame(columns=["QP", "CU_64", "CU_32", "CU_16", "CU_8", "PU_64", "PU_32", "PU_16", "PU_8", "PU_4", "LUM_A", "LUM_B", "LUM_C", "CRM_34", "LABEL"])
test_df1 = pd.DataFrame(columns=["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"])
test_df2 = pd.DataFrame(columns=["LABEL"])

test_df3 = pd.DataFrame(columns=["MAE1_0", "MAE1_1", "MAE1_2", "MAE1_3", "MAE1_4", "MAE1_5", "MAE1_6", "MAE1_7", "MAE1_8", "MAE1_9", "MAE1_10",
                                  "MAE1_11", "MAE1_12", "MAE1_13", "MAE1_14", "MAE1_15", "MAE1_16", "MAE1_17", "MAE1_18", "MAE1_19", "MAE1_20", 
                                  "MAE1_21", "MAE1_22", "MAE1_23", "MAE1_24", "MAE1_25", "MAE1_26", "MAE1_27", "MAE1_28", "MAE1_29", "MAE1_30", 
                                  "MAE1_31", "MAE1_32", "MAE1_33", "MAE1_34", "MAE1_35", "MAE1_36", "MAE1_37", "MAE1_38", "MAE1_39", "MAE1_40", 
                                  "MAE1_41", "MAE1_42", "MAE1_43", "MAE1_44", "MAE1_45", "MAE1_46", "MAE1_47", "MAE1_48", "MAE1_49", "MAE1_50", 
                                  "MAE1_51"])

test_df4 = pd.DataFrame(columns=["MAE2_0", "MAE2_1", "MAE2_2", "MAE2_3", "MAE2_4", "MAE2_5", "MAE2_6", "MAE2_7", "MAE2_8", "MAE2_9", "MAE2_10",
                                  "MAE2_11", "MAE2_12", "MAE2_13", "MAE2_14", "MAE2_15", "MAE2_16", "MAE2_17", "MAE2_18", "MAE2_19", "MAE2_20", 
                                  "MAE2_21", "MAE2_22", "MAE2_23", "MAE2_24", "MAE2_25", "MAE2_26", "MAE2_27", "MAE2_28", "MAE2_29", "MAE2_30", 
                                  "MAE2_31", "MAE2_32", "MAE2_33", "MAE2_34", "MAE2_35", "MAE2_36", "MAE2_37", "MAE2_38", "MAE2_39", "MAE2_40", 
                                  "MAE2_41", "MAE2_42", "MAE2_43", "MAE2_44", "MAE2_45", "MAE2_46", "MAE2_47", "MAE2_48", "MAE2_49", "MAE2_50", 
                                  "MAE2_51"])
test_df5 = pd.DataFrame(columns=["MAE1_1", "MAE1_2"])
test_df6 = pd.DataFrame(columns=["MAE2_1", "MAE2_2"])

scaler = MinMaxScaler()

for path1, path2, path3, path4 in test_csv_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    test_pkl_list = [path2, path4]
    
    pu1_64 = df1.loc[0, "pu_counts"]
    pu1_32 = df1.loc[1, "pu_counts"]
    pu1_16 = df1.loc[2, "pu_counts"]
    pu1_8 = df1.loc[3, "pu_counts"]
    pu1_4 = df1.loc[4, "pu_counts"]
    
    pu2_64 = df2.loc[0, "pu_counts"]
    pu2_32 = df2.loc[1, "pu_counts"]
    pu2_16 = df2.loc[2, "pu_counts"]
    pu2_8 = df2.loc[3, "pu_counts"]
    pu2_4 = df2.loc[4, "pu_counts"]
    
    test_df1 = pd.concat([test_df1, pd.DataFrame({
                                          "PU1_64": [pu1_64],
                                          "PU1_32": [pu1_32],
                                          "PU1_16": [pu1_16],
                                          "PU1_8": [pu1_8],
                                          "PU1_4": [pu1_4],
                                          
                                          "PU2_64": [pu2_64],
                                          "PU2_32": [pu2_32],
                                          "PU2_16": [pu2_16],
                                          "PU2_8": [pu2_8],
                                          "PU2_4": [pu2_4],

                                          })], 
                   ignore_index=True)
    
    test_df2 = pd.concat([test_df2, pd.DataFrame({
                                          "LABEL": [label]})], 
                   ignore_index=True)
    
    
    with open(test_pkl_list[0], 'rb') as file1:
        loaded_data1 = pickle.load(file1)
        
    with open(test_pkl_list[1], 'rb') as file2:
        loaded_data2 = pickle.load(file2)
    
    # 読み込んだデータからMAE結果を取得
    ghost_results1, ghost_results_shifted1 = loaded_data1
    ghost_results2, ghost_results_shifted2 = loaded_data2

    # タプル内のリストを抽出
    original_mae1 = ghost_results1
    shifted_mae1 = ghost_results_shifted1
    
    original_mae2 = ghost_results2
    shifted_mae2 = ghost_results_shifted2

    mae_d1 = [shifted - original for original, shifted in zip(original_mae1, shifted_mae1)]
    mae_d2 = [shifted - original for original, shifted in zip(original_mae2, shifted_mae2)]
    mae_d1 = [0 if val <= 0 else val for val in mae_d1]
    mae_d2 = [0 if val <= 0 else val for val in mae_d2]
    
    peaks_mae_d1, _ = find_peaks(mae_d1)
    peaks_mae_d2, _ = find_peaks(mae_d2)

    # peaks_mae_d1が空の場合、0を代入
    max_peak_index_mae_d1 = peaks_mae_d1[np.argmax([mae_d1[i] for i in peaks_mae_d1])] if peaks_mae_d1.size > 0 else 0

    # peaks_mae_d2が空の場合、0を代入
    max_peak_index_mae_d2 = peaks_mae_d2[np.argmax([mae_d2[i] for i in peaks_mae_d2])] if peaks_mae_d2.size > 0 else 0

    # ２番目に大きいピーク値のインデックスを取得
    sorted_peaks_mae_d1 = np.argsort([mae_d1[i] for i in peaks_mae_d1])
    second_max_peak_index_mae_d1 = peaks_mae_d1[sorted_peaks_mae_d1[-2]] if sorted_peaks_mae_d1.size >= 2 else 0


    sorted_peaks_mae_d2 = np.argsort([mae_d2[i] for i in peaks_mae_d2])
    second_max_peak_index_mae_d2 = peaks_mae_d2[sorted_peaks_mae_d2[-2]] if sorted_peaks_mae_d2.size >= 2 else 0


    
    test_df5 = pd.concat([test_df5, pd.DataFrame({
                                          "MAE1_1": [mae_d1[max_peak_index_mae_d1]],
                                          "MAE1_2": [mae_d1[second_max_peak_index_mae_d1]],
                                          
        })],

                   ignore_index=True)
    
    test_df6 = pd.concat([test_df6, pd.DataFrame({
                                          "MAE2_1": [mae_d2[max_peak_index_mae_d2]],
                                          "MAE2_2": [mae_d2[second_max_peak_index_mae_d2]],
                                          
        })],

                   ignore_index=True)
        
    test_df3 = pd.concat([test_df3, pd.DataFrame({
                                      "MAE1_0": [mae_d1[0]], 
                                      "MAE1_1": [mae_d1[1]],
                                      "MAE1_2": [mae_d1[2]], 
                                      "MAE1_3": [mae_d1[3]], 
                                      "MAE1_4": [mae_d1[4]], 
                                      "MAE1_5": [mae_d1[5]], 
                                      "MAE1_6": [mae_d1[6]], 
                                      "MAE1_7": [mae_d1[7]], 
                                      "MAE1_8": [mae_d1[8]], 
                                      "MAE1_9": [mae_d1[9]], 
                                      "MAE1_10": [mae_d1[10]],
                                      "MAE1_11": [mae_d1[11]], 
                                      "MAE1_12": [mae_d1[12]], 
                                      "MAE1_13": [mae_d1[13]], 
                                      "MAE1_14": [mae_d1[14]], 
                                      "MAE1_15": [mae_d1[15]], 
                                      "MAE1_16": [mae_d1[16]], 
                                      "MAE1_17": [mae_d1[17]], 
                                      "MAE1_18": [mae_d1[18]], 
                                      "MAE1_19": [mae_d1[19]], 
                                      "MAE1_20": [mae_d1[20]], 
                                      "MAE1_21": [mae_d1[21]], 
                                      "MAE1_22": [mae_d1[22]], 
                                      "MAE1_23": [mae_d1[23]], 
                                      "MAE1_24": [mae_d1[24]], 
                                      "MAE1_25": [mae_d1[25]], 
                                      "MAE1_26": [mae_d1[26]], 
                                      "MAE1_27": [mae_d1[27]], 
                                      "MAE1_28": [mae_d1[28]], 
                                      "MAE1_29": [mae_d1[29]], 
                                      "MAE1_30": [mae_d1[30]], 
                                      "MAE1_31": [mae_d1[31]], 
                                      "MAE1_32": [mae_d1[32]], 
                                      "MAE1_33": [mae_d1[33]], 
                                      "MAE1_34": [mae_d1[34]], 
                                      "MAE1_35": [mae_d1[35]], 
                                      "MAE1_36": [mae_d1[36]], 
                                      "MAE1_37": [mae_d1[37]], 
                                      "MAE1_38": [mae_d1[38]], 
                                      "MAE1_39": [mae_d1[39]], 
                                      "MAE1_40": [mae_d1[40]], 
                                      "MAE1_41": [mae_d1[41]], 
                                      "MAE1_42": [mae_d1[42]], 
                                      "MAE1_43": [mae_d1[43]], 
                                      "MAE1_44": [mae_d1[44]], 
                                      "MAE1_45": [mae_d1[45]], 
                                      "MAE1_46": [mae_d1[46]], 
                                      "MAE1_47": [mae_d1[47]], 
                                      "MAE1_48": [mae_d1[48]], 
                                      "MAE1_49": [mae_d1[49]], 
                                      "MAE1_50": [mae_d1[50]], 
                                      "MAE1_51": [mae_d1[51]],
                                        })],
        ignore_index=True)
    
    test_df4 = pd.concat([test_df4, pd.DataFrame({
                                  "MAE2_0": [mae_d2[0]], 
                                  "MAE2_1": [mae_d2[1]],
                                  "MAE2_2": [mae_d2[2]], 
                                  "MAE2_3": [mae_d2[3]], 
                                  "MAE2_4": [mae_d2[4]], 
                                  "MAE2_5": [mae_d2[5]], 
                                  "MAE2_6": [mae_d2[6]], 
                                  "MAE2_7": [mae_d2[7]], 
                                  "MAE2_8": [mae_d2[8]], 
                                  "MAE2_9": [mae_d2[9]], 
                                  "MAE2_10": [mae_d2[10]],
                                  "MAE2_11": [mae_d2[11]], 
                                  "MAE2_12": [mae_d2[12]], 
                                  "MAE2_13": [mae_d2[13]], 
                                  "MAE2_14": [mae_d2[14]], 
                                  "MAE2_15": [mae_d2[15]], 
                                  "MAE2_16": [mae_d2[16]], 
                                  "MAE2_17": [mae_d2[17]], 
                                  "MAE2_18": [mae_d2[18]], 
                                  "MAE2_19": [mae_d2[19]], 
                                  "MAE2_20": [mae_d2[20]], 
                                  "MAE2_21": [mae_d2[21]], 
                                  "MAE2_22": [mae_d2[22]], 
                                  "MAE2_23": [mae_d2[23]], 
                                  "MAE2_24": [mae_d2[24]], 
                                  "MAE2_25": [mae_d2[25]], 
                                  "MAE2_26": [mae_d2[26]], 
                                  "MAE2_27": [mae_d2[27]], 
                                  "MAE2_28": [mae_d2[28]], 
                                  "MAE2_29": [mae_d2[29]], 
                                  "MAE2_30": [mae_d2[30]], 
                                  "MAE2_31": [mae_d2[31]], 
                                  "MAE2_32": [mae_d2[32]], 
                                  "MAE2_33": [mae_d2[33]], 
                                  "MAE2_34": [mae_d2[34]], 
                                  "MAE2_35": [mae_d2[35]], 
                                  "MAE2_36": [mae_d2[36]], 
                                  "MAE2_37": [mae_d2[37]], 
                                  "MAE2_38": [mae_d2[38]], 
                                  "MAE2_39": [mae_d2[39]], 
                                  "MAE2_40": [mae_d2[40]], 
                                  "MAE2_41": [mae_d2[41]], 
                                  "MAE2_42": [mae_d2[42]], 
                                  "MAE2_43": [mae_d2[43]], 
                                  "MAE2_44": [mae_d2[44]], 
                                  "MAE2_45": [mae_d2[45]], 
                                  "MAE2_46": [mae_d2[46]], 
                                  "MAE2_47": [mae_d2[47]], 
                                  "MAE2_48": [mae_d2[48]], 
                                  "MAE2_49": [mae_d2[49]], 
                                  "MAE2_50": [mae_d2[50]], 
                                  "MAE2_51": [mae_d2[51]],
                                    })],
        ignore_index=True)

    test_df = pd.concat([test_df1, test_df3, test_df4], axis=1)

print(len(test_df))           

60


In [9]:
combined_df = pd.concat([train_df, test_df], ignore_index=True)
# print(len(combined_df))

# スケーラーを使って結合したデータをスケーリング
combined_scaled_data = scaler.fit_transform(combined_df)

# トレーニングデータとテストデータに再分割
X_train = combined_scaled_data[:len(train_df)]
X_test = combined_scaled_data[len(train_df):]
# print(len(X_train))
# print(len(X_test))

# ラベルの準備
Y_train = train_df2['LABEL'].astype(int)
Y_test = test_df2['LABEL'].astype(int)

In [10]:
print(X_train[0:1])
print(X_train[1350:1351])

[[0.         0.2032967  0.48690476 0.6967001  0.38760362 0.
  0.20374449 0.49177438 0.69549667 0.37220281 0.14939417 0.05316783
  0.01639331 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.06095473 0.         0.         0.         0.13398968
  0.24082305 0.32356057 0.22154304 0.13319378 0.00295752 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.00830975 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.0118259  0.1284793
  0.         0.         0.         0.18234999 0.26023825 0.37229591
  0.25140557 0.15680302 0.02014168 0.         0.         0.
  0.     

In [11]:
print(X_test[0:1])
print(X_test[150:151])

[[0.00000000e+00 1.64835165e-01 7.73214286e-01 6.99288256e-01
  2.54578755e-01 0.00000000e+00 1.69603524e-01 7.70857814e-01
  6.92407739e-01 2.41431404e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.92429475e-01
  2.30784149e-01 2.47742431e-01 1.71152734e-01 6.22571026e-02
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  5.19066574e-03 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.0000

In [12]:
# Cの範囲を指定
C_values = {'C': [0.01, 0.1, 1, 10, 100, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000]}

# SVMモデルのインスタンスを作成
svm_model = SVC(kernel='rbf')

# グリッドサーチのインスタンスを作成
grid_search = GridSearchCV(svm_model, C_values, cv=9, scoring='accuracy')

# グリッドサーチを実行
grid_search.fit(X_train, Y_train)

# 結果のデータフレームを作成
# 結果のデータフレームを作成
results = pd.DataFrame(grid_search.cv_results_)
# print(results)

# 新しい列名のマッピングを作成
new_column_names = {
    'param_C': 'C',
    'split0_test_score': 'k=1',
    'split1_test_score': 'k=2',
    'split2_test_score': 'k=3',
    'split3_test_score': 'k=4',
    'split4_test_score': 'k=5',
    'split5_test_score': 'k=6',
    'split6_test_score': 'k=7',
    'split7_test_score': 'k=8',
    'split8_test_score': 'k=9',
    'mean_test_score': 'Mean_Val_Score'
}

# 列名を変更
results = results.rename(columns=new_column_names)

# 変更後の表を表示
print(results[['C', 'k=1', 'k=2', 'k=3', 'k=4', 'k=5', 'k=6', 'k=7', 'k=8', 'k=9', 'Mean_Val_Score']])
    
# # 最適なハイパーパラメータを表示
print("Best Parameters: ", grid_search.best_params_)

# # 最適なモデルを取得
best_svm_model = grid_search.best_estimator_

# # テストデータで評価
accuracy = best_svm_model.score(X_test, Y_test)
print("Accuracy on Test Set: {:.4f}".format(accuracy))

       C  k=1  k=2  k=3       k=4  k=5       k=6  k=7  k=8  k=9  Mean_Val_Score
0   0.01  1.0  1.0  1.0  0.983333  1.0  0.983333  1.0  1.0  1.0        0.996296
1    0.1  1.0  1.0  1.0  0.983333  1.0  1.000000  1.0  1.0  1.0        0.998148
2      1  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
3     10  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
4    100  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
5   1000  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
6   1500  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
7   2000  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
8   2500  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
9   3000  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
10  3500  1.0  1.0  1.0  1.000000  1.0  1.000000  1.0  1.0  1.0        1.000000
11  4000  1.0  1.0  1.0  1.000000  1.0  

In [13]:
# PU1 + PU2

k = 9

# SVMモデルを初期化（RBFカーネルを使用）
svm_model = SVC(kernel='rbf', C=1, gamma='scale')  # Cとgammaはハイパーパラメータで調整可能


# K-fold cross validation
cv_scores = cross_val_score(svm_model, X_train, Y_train, cv=k)
average_accuracy = np.round(cv_scores.mean(), 4)
print(f'Average validation performance of {k}-fold: {average_accuracy}')

svm_model.fit(X_train, Y_train)
Y_pred = svm_model.predict(X_test)

report = classification_report(Y_test, Y_pred)

print(f'Summary:\n{report}')

# svm_model._gamma

Average validation performance of 9-fold: 1.0
Summary:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       1.00      1.00      1.00        30

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60

