In [1]:
import random
import os
import re
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks
import seaborn as sns
import pickle
import torch

pd.set_option('display.expand_frame_repr', False)  # DataFrameを改行せずに表示
pd.set_option('display.max_columns', None)  # すべての列を表示

plt.rcParams["font.size"]=5
plt.rcParams["figure.figsize"]=(2.0, 1.0)
plt.rcParams["figure.dpi"]= 300

In [2]:
def extract_finalQP(filename):
    match = re.search(r'2ndQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def extract_1stQP(filename):
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None

def is_double_compressed(mean_difference, final_QP, threshold):    
    mean_difference = mean_difference[0]
    final_QP = final_QP[0]

    # energy_clamp = torch.clamp(mean_difference, min=0)
    # energy_clamp = torch.clamp(mean_difference)
    # energy = torch.sum(torch.square(energy_clamp))
    # mean_difference_right_clamp = torch.clamp(mean_difference[final_QP+1:52], min=0)
    energy = torch.sum(torch.square(mean_difference))
    # mean_difference_right_clamp = torch.clamp(mean_difference[final_QP+1:52])
    right_energy = torch.sum(torch.square(mean_difference[final_QP+1:52]))
        
    if energy > 0:
        energy_ratio = right_energy / energy
        if energy_ratio > threshold:
            return True
        elif energy_ratio <= threshold:
            return False
    else:
        return -1
    
def calculate_mae(file_path):
    try:
        with open(file_path, 'rb') as file:
            loaded_data, loaded_data_shifted = pickle.load(file)
    except Exception as e:
        print(f"Error occurred while loading {file_path}: {e}")
        return None

    # タプル内のリストを抽出
    original_mae = loaded_data
    shifted_mae = loaded_data_shifted

    # Coding ghostを計算してリストに格納する
    mae_difference = [shifted - original for original, shifted in zip(original_mae, shifted_mae)]
    
    # mae_differenceの各要素においてマイナスの値を0に変換
    mae_difference_positive = [0 if val < 0 else val for val in mae_difference]
    
    # mae_difference_positiveをtensorに変換
    mae_difference_tensor = torch.tensor(mae_difference_positive)
    
    
    return mae_difference_positive, mae_difference_tensor

In [3]:
rootpath_csv = "/Prove/Yoshihisa/HEIF_ghost/HEIF_IMAGES_CSV/"

single_path1 = os.path.join(rootpath_csv, 'HEIF_images_single_csv')
single_path2 = os.path.join(rootpath_csv, 'HEIF_images_second_sameQP_csv')
single_list1 = [os.path.join(single_path1, file) for file in sorted(os.listdir(single_path1))]
single_list2 = [os.path.join(single_path2, file) for file in sorted(os.listdir(single_path2))]

second_largeQP1_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_csv')
second_largeQP1_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_csv')
second_largeQP1_list1 = [os.path.join(second_largeQP1_path1, file) for file in sorted(os.listdir(second_largeQP1_path1))]
second_largeQP1_list2 = [os.path.join(second_largeQP1_path2, file) for file in sorted(os.listdir(second_largeQP1_path2))]

second_sameQP_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_sameQP_csv')
second_sameQP_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_sameQP_csv')
second_sameQP_list1 = [os.path.join(second_sameQP_path1, file) for file in sorted(os.listdir(second_sameQP_path1))]
second_sameQP_list2 = [os.path.join(second_sameQP_path2, file) for file in sorted(os.listdir(second_sameQP_path2))]

second_largeQP2_path1 = os.path.join(rootpath_csv, 'HEIF_images_second_largeQP_csv')
second_largeQP2_path2 = os.path.join(rootpath_csv, 'HEIF_images_triple_largeQP_csv')
second_largeQP2_list1 = [os.path.join(second_largeQP2_path1, file) for file in sorted(os.listdir(second_largeQP2_path1))]
second_largeQP2_list2 = [os.path.join(second_largeQP2_path2, file) for file in sorted(os.listdir(second_largeQP2_path2))]

print("single_list1: ", len(single_list1))
print("single_list2: ", len(single_list2))
print()
print("second_largeQP1_list1: ", len(second_largeQP1_list1))
print("second_largeQP1_list2: ", len(second_largeQP1_list2))
print("second_sameQP_list1: ", len(second_sameQP_list1))
print("second_sameQP_list2: ", len(second_sameQP_list2))
print("second_largeQP_list1: ", len(second_largeQP2_list1))
print("second_largeQP_list2: ", len(second_largeQP2_list2))


single_list1:  3080
single_list2:  3080

second_largeQP1_list1:  17556
second_largeQP1_list2:  17556
second_sameQP_list1:  3080
second_sameQP_list2:  3080
second_largeQP_list1:  12012
second_largeQP_list2:  12012


In [4]:
rootpath_pkl = "/Prove/Yoshihisa/HEIF_ghost/PKL/"

single_pathA = os.path.join(rootpath_pkl, 'pkl_single')
single_pathB = os.path.join(rootpath_pkl, 'pkl_second_sameQP')
single_listA = [os.path.join(single_pathA, file) for file in sorted(os.listdir(single_pathA))]
single_listB = [os.path.join(single_pathB, file) for file in sorted(os.listdir(single_pathB))]

second_largeQP1_pathA = os.path.join(rootpath_pkl, 'pkl_second')
second_largeQP1_pathB = os.path.join(rootpath_pkl, 'pkl_triple')
second_largeQP1_listA = [os.path.join(second_largeQP1_pathA, file) for file in sorted(os.listdir(second_largeQP1_pathA))]
second_largeQP1_listB = [os.path.join(second_largeQP1_pathB, file) for file in sorted(os.listdir(second_largeQP1_pathB))]

second_sameQP_pathA = os.path.join(rootpath_pkl, 'pkl_second_sameQP')
second_sameQP_pathB = os.path.join(rootpath_pkl, 'pkl_triple_sameQP')
second_sameQP_listA = [os.path.join(second_sameQP_pathA, file) for file in sorted(os.listdir(second_sameQP_pathA))]
second_sameQP_listB = [os.path.join(second_sameQP_pathB, file) for file in sorted(os.listdir(second_sameQP_pathB))]

second_largeQP2_pathA = os.path.join(rootpath_pkl, 'pkl_second_largeQP')
second_largeQP2_pathB = os.path.join(rootpath_pkl, 'pkl_triple_largeQP')
second_largeQP2_listA = [os.path.join(second_largeQP2_pathA, file) for file in sorted(os.listdir(second_largeQP2_pathA))]
second_largeQP2_listB = [os.path.join(second_largeQP2_pathB, file) for file in sorted(os.listdir(second_largeQP2_pathB))]

print("single_listA: ", len(single_listA))
print("single_listB: ", len(single_listB))
print()
print("second_largeQP1_listA: ", len(second_largeQP1_listA))
print("second_largeQP1_listB: ", len(second_largeQP1_listB))
print("second_sameQP_listA: ", len(second_sameQP_listA))
print("second_sameQP_listB: ", len(second_sameQP_listB))
print("second_largeQP2_listA: ", len(second_largeQP2_listA))
print("second_largeQP2_listB: ", len(second_largeQP2_listB))

single_listA:  3080
single_listB:  3080

second_largeQP1_listA:  17556
second_largeQP1_listB:  17556
second_sameQP_listA:  3080
second_sameQP_listB:  3080
second_largeQP2_listA:  12012
second_largeQP2_listB:  12012


In [5]:
single_csv1 = list(zip(single_list1, single_listA, single_list2, single_listB))
single_csv = random.sample(single_csv1, 2070)
print(len(single_csv))

single_test_csv = [item for item in single_csv1 if item not in single_csv]
single_test_csv1 = random.sample(single_test_csv, 300)
single_test_csv2 = random.sample(single_test_csv, 299)
print(len(single_test_csv1))
print(len(single_test_csv2))

2070
300
299


In [6]:
QP1D1 = ["_1stQP25_2ndQP24", "_1stQP40_2ndQP39"]
QP1D3 = ["_1stQP30_2ndQP27", "_1stQP35_2ndQP32", "_1stQP45_2ndQP42"]
QP1D4 = ["_1stQP20_2ndQP16"]
QP1D5 = ["_1stQP10_2ndQP5", "_1stQP15_2ndQP10", "_1stQP25_2ndQP20", "_1stQP32_2ndQP27", "_1stQP50_2ndQP45"]
QP1D6 = ["_1stQP30_2ndQP24", "_1stQP45_2ndQP39"]
QP1D8 = ["_1stQP32_2ndQP24", "_1stQP35_2ndQP27", "_1stQP40_2ndQP32", "_1stQP50_2ndQP42"]
QP1D9 = ["_1stQP25_2ndQP16"]
QP1D10 = ["_1stQP15_2ndQP5", "_1stQP20_2ndQP10", "_1stQP30_2ndQP20"]
QP1D11 = ["_1stQP35_2ndQP24", "_1stQP50_2ndQP39"]
QP1D12 = ["_1stQP32_2ndQP20"]
QP1D13 = ["_1stQP40_2ndQP27", "_1stQP45_2ndQP32"]
QP1D14 = ["_1stQP30_2ndQP16"]
QP1D15 = ["_1stQP20_2ndQP5", "_1stQP25_2ndQP10", "_1stQP35_2ndQP20"]
QP1D16 = ["_1stQP32_2ndQP16", "_1stQP40_2ndQP24"]
QP1D18 = ["_1stQP45_2ndQP27", "_1stQP50_2ndQP32"]
QP1D19 = ["_1stQP35_2ndQP16"]
QP1D20 = ["_1stQP25_2ndQP5", "_1stQP30_2ndQP10", "_1stQP40_2ndQP20"]
QP1D21 = ["_1stQP45_2ndQP24"]
QP1D22 = ["_1stQP32_2ndQP10"]
QP1D23 = ["_1stQP50_2ndQP27"]
QP1D24 = ["_1stQP40_2ndQP16"]
QP1D25 = ["_1stQP30_2ndQP5", "_1stQP35_2ndQP10", "_1stQP45_2ndQP20"]
QP1D26 = ["_1stQP50_2ndQP24"]
QP1D27 = ["_1stQP32_2ndQP5"]
QP1D29 = ["_1stQP45_2ndQP16"]
QP1D30 = ["_1stQP35_2ndQP5", "_1stQP40_2ndQP10", "_1stQP50_2ndQP20"]
QP1D34 = ["_1stQP50_2ndQP16"]
QP1D35 = ["_1stQP40_2ndQP5", "_1stQP45_2ndQP10"]
QP1D40 = ["_1stQP45_2ndQP5", "_1stQP50_2ndQP10"]
QP1D45 = ["_1stQP50_2ndQP5"]

In [7]:
second_largeQP1_csv1 = list(zip(second_largeQP1_list1, second_largeQP1_listA, second_largeQP1_list2, second_largeQP1_listB))

# 条件に合致する要素を抽出
QP1D1_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D1)]
QP1D3_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D3)]
QP1D4_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D4)]
QP1D5_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D5)]
QP1D6_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D6)]
QP1D8_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D8)]
QP1D9_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D9)]
QP1D10_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D10)]
QP1D11_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D11)]
QP1D12_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D12)]
QP1D13_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D13)]
QP1D14_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D14)]
QP1D15_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D15)]
QP1D16_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D16)]
QP1D18_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D18)]
QP1D19_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D19)]
QP1D20_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D20)]
QP1D21_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D21)]
QP1D22_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D22)]
QP1D23_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D23)]
QP1D24_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D24)]
QP1D25_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D25)]
QP1D26_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D26)]
QP1D27_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D27)]
QP1D29_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D29)]
QP1D30_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D30)]
QP1D34_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D34)]
QP1D35_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D35)]
QP1D40_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D40)]
QP1D45_second_largeQP1_csv1 = [item for item in second_largeQP1_csv1 if any(target_string in item[0] for target_string in QP1D45)]


QP1D1_random_sample = random.sample(QP1D1_second_largeQP1_csv1, 23)
QP1D3_random_sample = random.sample(QP1D3_second_largeQP1_csv1, 23)
QP1D4_random_sample = random.sample(QP1D4_second_largeQP1_csv1, 23)
QP1D5_random_sample = random.sample(QP1D5_second_largeQP1_csv1, 23)
QP1D6_random_sample = random.sample(QP1D6_second_largeQP1_csv1, 23)
QP1D8_random_sample = random.sample(QP1D8_second_largeQP1_csv1, 23)
QP1D9_random_sample = random.sample(QP1D9_second_largeQP1_csv1, 23)
QP1D10_random_sample = random.sample(QP1D10_second_largeQP1_csv1, 23)
QP1D11_random_sample = random.sample(QP1D11_second_largeQP1_csv1, 23)
QP1D12_random_sample = random.sample(QP1D12_second_largeQP1_csv1, 23)
QP1D13_random_sample = random.sample(QP1D13_second_largeQP1_csv1, 23)
QP1D14_random_sample = random.sample(QP1D14_second_largeQP1_csv1, 23)
QP1D15_random_sample = random.sample(QP1D15_second_largeQP1_csv1, 23)
QP1D16_random_sample = random.sample(QP1D16_second_largeQP1_csv1, 23)
QP1D18_random_sample = random.sample(QP1D18_second_largeQP1_csv1, 23)
QP1D19_random_sample = random.sample(QP1D19_second_largeQP1_csv1, 23)
QP1D20_random_sample = random.sample(QP1D20_second_largeQP1_csv1, 23)
QP1D21_random_sample = random.sample(QP1D21_second_largeQP1_csv1, 23)
QP1D22_random_sample = random.sample(QP1D22_second_largeQP1_csv1, 23)
QP1D23_random_sample = random.sample(QP1D23_second_largeQP1_csv1, 23)
QP1D24_random_sample = random.sample(QP1D24_second_largeQP1_csv1, 23)
QP1D25_random_sample = random.sample(QP1D25_second_largeQP1_csv1, 23)
QP1D26_random_sample = random.sample(QP1D26_second_largeQP1_csv1, 23)
QP1D27_random_sample = random.sample(QP1D27_second_largeQP1_csv1, 23)
QP1D29_random_sample = random.sample(QP1D29_second_largeQP1_csv1, 23)
QP1D30_random_sample = random.sample(QP1D30_second_largeQP1_csv1, 23)
QP1D34_random_sample = random.sample(QP1D34_second_largeQP1_csv1, 23)
QP1D35_random_sample = random.sample(QP1D35_second_largeQP1_csv1, 23)
QP1D40_random_sample = random.sample(QP1D40_second_largeQP1_csv1, 23)
QP1D45_random_sample = random.sample(QP1D45_second_largeQP1_csv1, 23)


# 各リストから23個の要素をランダムに抽出してリストに格納
second_largeQP1_csv = (
    QP1D1_random_sample + QP1D3_random_sample + QP1D4_random_sample +
    QP1D5_random_sample + QP1D6_random_sample + QP1D8_random_sample +
    QP1D9_random_sample + QP1D10_random_sample + QP1D11_random_sample +
    QP1D12_random_sample + QP1D13_random_sample + QP1D14_random_sample +
    QP1D15_random_sample + QP1D16_random_sample + QP1D18_random_sample +
    QP1D19_random_sample + QP1D20_random_sample + QP1D21_random_sample +
    QP1D22_random_sample + QP1D23_random_sample + QP1D24_random_sample +
    QP1D25_random_sample + QP1D26_random_sample + QP1D27_random_sample +
    QP1D29_random_sample + QP1D30_random_sample + QP1D34_random_sample +
    QP1D35_random_sample + QP1D40_random_sample + QP1D45_random_sample
)

# 結果の表示
print("second_largeQP1_csv:", len(second_largeQP1_csv))

# 各リストから23個の要素を除いた残りの要素を取得
QP1D1_remaining = [item for item in QP1D1_second_largeQP1_csv1 if item not in QP1D1_random_sample]
QP1D3_remaining = [item for item in QP1D3_second_largeQP1_csv1 if item not in QP1D3_random_sample]
QP1D4_remaining = [item for item in QP1D4_second_largeQP1_csv1 if item not in QP1D4_random_sample]
QP1D5_remaining = [item for item in QP1D5_second_largeQP1_csv1 if item not in QP1D5_random_sample]
QP1D6_remaining = [item for item in QP1D6_second_largeQP1_csv1 if item not in QP1D6_random_sample]
QP1D8_remaining = [item for item in QP1D8_second_largeQP1_csv1 if item not in QP1D8_random_sample]
QP1D9_remaining = [item for item in QP1D9_second_largeQP1_csv1 if item not in QP1D9_random_sample]
QP1D10_remaining = [item for item in QP1D10_second_largeQP1_csv1 if item not in QP1D10_random_sample]
QP1D11_remaining = [item for item in QP1D11_second_largeQP1_csv1 if item not in QP1D11_random_sample]
QP1D12_remaining = [item for item in QP1D12_second_largeQP1_csv1 if item not in QP1D12_random_sample]
QP1D13_remaining = [item for item in QP1D13_second_largeQP1_csv1 if item not in QP1D13_random_sample]
QP1D14_remaining = [item for item in QP1D14_second_largeQP1_csv1 if item not in QP1D14_random_sample]
QP1D15_remaining = [item for item in QP1D15_second_largeQP1_csv1 if item not in QP1D15_random_sample]
QP1D16_remaining = [item for item in QP1D16_second_largeQP1_csv1 if item not in QP1D16_random_sample]
QP1D18_remaining = [item for item in QP1D18_second_largeQP1_csv1 if item not in QP1D18_random_sample]
QP1D19_remaining = [item for item in QP1D19_second_largeQP1_csv1 if item not in QP1D19_random_sample]
QP1D20_remaining = [item for item in QP1D20_second_largeQP1_csv1 if item not in QP1D20_random_sample]
QP1D21_remaining = [item for item in QP1D21_second_largeQP1_csv1 if item not in QP1D21_random_sample]
QP1D22_remaining = [item for item in QP1D22_second_largeQP1_csv1 if item not in QP1D22_random_sample]
QP1D23_remaining = [item for item in QP1D23_second_largeQP1_csv1 if item not in QP1D23_random_sample]
QP1D24_remaining = [item for item in QP1D24_second_largeQP1_csv1 if item not in QP1D24_random_sample]
QP1D25_remaining = [item for item in QP1D25_second_largeQP1_csv1 if item not in QP1D25_random_sample]
QP1D26_remaining = [item for item in QP1D26_second_largeQP1_csv1 if item not in QP1D26_random_sample]
QP1D27_remaining = [item for item in QP1D27_second_largeQP1_csv1 if item not in QP1D27_random_sample]
QP1D29_remaining = [item for item in QP1D29_second_largeQP1_csv1 if item not in QP1D29_random_sample]
QP1D30_remaining = [item for item in QP1D30_second_largeQP1_csv1 if item not in QP1D30_random_sample]
QP1D34_remaining = [item for item in QP1D34_second_largeQP1_csv1 if item not in QP1D34_random_sample]
QP1D35_remaining = [item for item in QP1D35_second_largeQP1_csv1 if item not in QP1D35_random_sample]
QP1D40_remaining = [item for item in QP1D40_second_largeQP1_csv1 if item not in QP1D40_random_sample]
QP1D45_remaining = [item for item in QP1D45_second_largeQP1_csv1 if item not in QP1D45_random_sample]


# 残りの要素からランダムに10個の要素を抽出
QP1D1_random_remaining = random.sample(QP1D1_remaining, 10)
QP1D3_random_remaining = random.sample(QP1D3_remaining, 10)
QP1D4_random_remaining = random.sample(QP1D4_remaining, 10)
QP1D5_random_remaining = random.sample(QP1D5_remaining, 10)
QP1D6_random_remaining = random.sample(QP1D6_remaining, 10)
QP1D8_random_remaining = random.sample(QP1D8_remaining, 10)
QP1D9_random_remaining = random.sample(QP1D9_remaining, 10)
QP1D10_random_remaining = random.sample(QP1D10_remaining, 10)
QP1D11_random_remaining = random.sample(QP1D11_remaining, 10)
QP1D12_random_remaining = random.sample(QP1D12_remaining, 10)
QP1D13_random_remaining = random.sample(QP1D13_remaining, 10)
QP1D14_random_remaining = random.sample(QP1D14_remaining, 10)
QP1D15_random_remaining = random.sample(QP1D15_remaining, 10)
QP1D16_random_remaining = random.sample(QP1D16_remaining, 10)
QP1D18_random_remaining = random.sample(QP1D18_remaining, 10)
QP1D19_random_remaining = random.sample(QP1D19_remaining, 10)
QP1D20_random_remaining = random.sample(QP1D20_remaining, 10)
QP1D21_random_remaining = random.sample(QP1D21_remaining, 10)
QP1D22_random_remaining = random.sample(QP1D22_remaining, 10)
QP1D23_random_remaining = random.sample(QP1D23_remaining, 10)
QP1D24_random_remaining = random.sample(QP1D24_remaining, 10)
QP1D25_random_remaining = random.sample(QP1D25_remaining, 10)
QP1D26_random_remaining = random.sample(QP1D26_remaining, 10)
QP1D27_random_remaining = random.sample(QP1D27_remaining, 10)
QP1D29_random_remaining = random.sample(QP1D29_remaining, 10)
QP1D30_random_remaining = random.sample(QP1D30_remaining, 10)
QP1D34_random_remaining = random.sample(QP1D34_remaining, 10)
QP1D35_random_remaining = random.sample(QP1D35_remaining, 10)
QP1D40_random_remaining = random.sample(QP1D40_remaining, 10)
QP1D45_random_remaining = random.sample(QP1D45_remaining, 10)


second_largeQP1_test_csv = (
    QP1D1_random_remaining + QP1D3_random_remaining + QP1D4_random_remaining +
    QP1D5_random_remaining + QP1D6_random_remaining + QP1D8_random_remaining +
    QP1D9_random_remaining + QP1D10_random_remaining + QP1D11_random_remaining +
    QP1D12_random_remaining + QP1D13_random_remaining + QP1D14_random_remaining +
    QP1D15_random_remaining + QP1D16_random_remaining + QP1D18_random_remaining +
    QP1D19_random_remaining + QP1D20_random_remaining + QP1D21_random_remaining +
    QP1D22_random_remaining + QP1D23_random_remaining + QP1D24_random_remaining +
    QP1D25_random_remaining + QP1D26_random_remaining + QP1D27_random_remaining +
    QP1D29_random_remaining + QP1D30_random_remaining + QP1D34_random_remaining +
    QP1D35_random_remaining + QP1D40_random_remaining + QP1D45_random_remaining
)

# 結果の表示
print("second_largeQP1_test_csv:", len(second_largeQP1_test_csv))


second_largeQP1_csv: 690
second_largeQP1_test_csv: 300


In [8]:
QPS5 = ["_1stQP5_2ndQP5"]
QPS10 = ["_1stQP10_2ndQP10"]
QPS16 = ["_1stQP16_2ndQP16"]
QPS20 = ["_1stQP20_2ndQP20"]
QPS24 = ["_1stQP24_2ndQP24"]
QPS27 = ["_1stQP27_2ndQP27"]
QPS32 = ["_1stQP32_2ndQP32"]
QPS39 = ["_1stQP39_2ndQP39"]
QPS42 = ["_1stQP42_2ndQP42"]
QPS45 = ["_1stQP45_2ndQP45"]

In [9]:
second_sameQP_csv1 = list(zip(second_sameQP_list1, second_sameQP_listA, second_sameQP_list2, second_sameQP_listB))

# 条件に合致する要素を抽出
QPS5_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS5)]
QPS10_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS10)]
QPS16_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS16)]
QPS20_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS20)]
QPS24_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS24)]
QPS27_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS27)]
QPS32_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS32)]
QPS39_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS39)]
QPS42_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS42)]
QPS45_second_sameQP_csv1 = [item for item in second_sameQP_csv1 if any(target_string in item[0] for target_string in QPS45)]

QPS5_random_sample = random.sample(QPS5_second_sameQP_csv1, 69)
QPS10_random_sample = random.sample(QPS10_second_sameQP_csv1, 69)
QPS16_random_sample = random.sample(QPS16_second_sameQP_csv1, 69)
QPS20_random_sample = random.sample(QPS20_second_sameQP_csv1, 69)
QPS24_random_sample = random.sample(QPS24_second_sameQP_csv1, 69)
QPS27_random_sample = random.sample(QPS27_second_sameQP_csv1, 69)
QPS32_random_sample = random.sample(QPS32_second_sameQP_csv1, 69)
QPS39_random_sample = random.sample(QPS39_second_sameQP_csv1, 69)
QPS42_random_sample = random.sample(QPS42_second_sameQP_csv1, 69)
QPS45_random_sample = random.sample(QPS45_second_sameQP_csv1, 69)

second_sameQP_csv = (
    QPS5_random_sample + QPS10_random_sample + QPS16_random_sample + 
    QPS20_random_sample + QPS24_random_sample + QPS27_random_sample +
    QPS32_random_sample + QPS39_random_sample + QPS42_random_sample + 
    QPS45_random_sample
)


# 結果の表示
print("second_sameQP_csv:", len(second_sameQP_csv))

# 各リストから23個の要素を除いた残りの要素を取得

QPS5_remaining = [item for item in QPS5_second_sameQP_csv1 if item not in QPS5_random_sample]
QPS10_remaining = [item for item in QPS10_second_sameQP_csv1 if item not in QPS10_random_sample]
QPS16_remaining = [item for item in QPS16_second_sameQP_csv1 if item not in QPS16_random_sample]
QPS20_remaining = [item for item in QPS20_second_sameQP_csv1 if item not in QPS20_random_sample]
QPS24_remaining = [item for item in QPS24_second_sameQP_csv1 if item not in QPS24_random_sample]
QPS27_remaining = [item for item in QPS27_second_sameQP_csv1 if item not in QPS27_random_sample]
QPS32_remaining = [item for item in QPS32_second_sameQP_csv1 if item not in QPS32_random_sample]
QPS39_remaining = [item for item in QPS39_second_sameQP_csv1 if item not in QPS39_random_sample]
QPS42_remaining = [item for item in QPS42_second_sameQP_csv1 if item not in QPS42_random_sample]
QPS45_remaining = [item for item in QPS45_second_sameQP_csv1 if item not in QPS45_random_sample]


# 残りの要素からランダムに10個の要素を抽出
QPS5_random_remaining = random.sample(QPS5_remaining, 30)
QPS10_random_remaining = random.sample(QPS10_remaining, 30)
QPS16_random_remaining = random.sample(QPS16_remaining, 30)
QPS20_random_remaining = random.sample(QPS20_remaining, 30)
QPS24_random_remaining = random.sample(QPS24_remaining, 30)
QPS27_random_remaining = random.sample(QPS27_remaining, 30)
QPS32_random_remaining = random.sample(QPS32_remaining, 30)
QPS39_random_remaining = random.sample(QPS39_remaining, 30)
QPS42_random_remaining = random.sample(QPS42_remaining, 30)
QPS45_random_remaining = random.sample(QPS45_remaining, 30)


second_sameQP_test_csv = (
    QPS5_random_remaining + QPS10_random_remaining + QPS16_random_remaining + QPS20_random_remaining + 
    QPS24_random_remaining + QPS27_random_remaining + QPS32_random_remaining + QPS39_random_remaining +
    QPS42_random_remaining + QPS45_random_remaining
)

# 結果の表示
print("second_sameQP_test_csv:", len(second_sameQP_test_csv))

second_sameQP_csv: 690
second_sameQP_test_csv: 300


In [10]:
QP2D1 = ["_1stQP15_2ndQP16"]
QP2D2 = ["_1stQP25_2ndQP27", "_1stQP30_2ndQP32", "_1stQP40_2ndQP42"]
QP2D4 = ["_1stQP20_2ndQP24", "_1stQP35_2ndQP39"]
QP2D5 = ["_1stQP15_2ndQP20", "_1stQP40_2ndQP45"]
QP2D6 = ["_1stQP10_2ndQP16"]
QP2D7 = ["_1stQP20_2ndQP27", "_1stQP25_2ndQP32", "_1stQP32_2ndQP39", "_1stQP35_2ndQP42"]
QP2D9 = ["_1stQP15_2ndQP24", "_1stQP30_2ndQP39"]
QP2D10 = ["_1stQP10_2ndQP20", "_1stQP32_2ndQP42", "_1stQP35_2ndQP45"]
QP2D12 = ["_1stQP15_2ndQP27", "_1stQP20_2ndQP32", "_1stQP30_2ndQP42"]
QP2D13 = ["_1stQP32_2ndQP45"]
QP2D14 = ["_1stQP10_2ndQP24", "_1stQP25_2ndQP39"]
QP2D15 = ["_1stQP30_2ndQP45"]
QP2D17 = ["_1stQP10_2ndQP27", "_1stQP15_2ndQP32", "_1stQP25_2ndQP42"]
QP2D19 = ["_1stQP20_2ndQP39"]
QP2D20 = ["_1stQP25_2ndQP45"]
QP2D22 = ["_1stQP10_2ndQP32", "_1stQP20_2ndQP42"]
QP2D24 = ["_1stQP15_2ndQP39"]
QP2D25 = ["_1stQP20_2ndQP45"]
QP2D27 = ["_1stQP15_2ndQP42"]
QP2D29 = ["_1stQP10_2ndQP39"]
QP2D30 = ["_1stQP15_2ndQP45"]
QP2D32 = ["_1stQP10_2ndQP42"]
QP2D35 = ["_1stQP10_2ndQP45"]

In [11]:
second_largeQP2_csv1 = list(zip(second_largeQP2_list1, second_largeQP2_listA, second_largeQP2_list2, second_largeQP2_listB))

# 条件に合致する要素を抽出
QP2D1_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D1)]
QP2D2_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D2)]
QP2D4_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D4)]
QP2D5_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D5)]
QP2D6_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D6)]
QP2D7_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D7)]
QP2D9_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D9)]
QP2D10_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D10)]
QP2D12_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D12)]
QP2D13_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D13)]
QP2D14_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D14)]
QP2D15_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D15)]
QP2D17_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D17)]
QP2D19_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D19)]
QP2D20_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D20)]
QP2D22_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D22)]
QP2D24_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D24)]
QP2D25_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D25)]
QP2D27_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D27)]
QP2D29_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D29)]
QP2D30_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D30)]
QP2D32_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D32)]
QP2D35_second_largeQP2_csv1 = [item for item in second_largeQP2_csv1 if any(target_string in item[0] for target_string in QP2D35)]

QP2D1_random_sample = random.sample(QP2D1_second_largeQP2_csv1, 30)
QP2D2_random_sample = random.sample(QP2D2_second_largeQP2_csv1, 30)
QP2D4_random_sample = random.sample(QP2D4_second_largeQP2_csv1, 30)
QP2D5_random_sample = random.sample(QP2D5_second_largeQP2_csv1, 30)
QP2D6_random_sample = random.sample(QP2D6_second_largeQP2_csv1, 30)
QP2D7_random_sample = random.sample(QP2D7_second_largeQP2_csv1, 30)
QP2D9_random_sample = random.sample(QP2D9_second_largeQP2_csv1, 30)
QP2D10_random_sample = random.sample(QP2D10_second_largeQP2_csv1, 30)
QP2D12_random_sample = random.sample(QP2D12_second_largeQP2_csv1, 30)
QP2D13_random_sample = random.sample(QP2D13_second_largeQP2_csv1, 30)
QP2D14_random_sample = random.sample(QP2D14_second_largeQP2_csv1, 30)
QP2D15_random_sample = random.sample(QP2D15_second_largeQP2_csv1, 30)
QP2D17_random_sample = random.sample(QP2D17_second_largeQP2_csv1, 30)
QP2D19_random_sample = random.sample(QP2D19_second_largeQP2_csv1, 30)
QP2D20_random_sample = random.sample(QP2D20_second_largeQP2_csv1, 30)
QP2D22_random_sample = random.sample(QP2D22_second_largeQP2_csv1, 30)
QP2D24_random_sample = random.sample(QP2D24_second_largeQP2_csv1, 30)
QP2D25_random_sample = random.sample(QP2D25_second_largeQP2_csv1, 30)
QP2D27_random_sample = random.sample(QP2D27_second_largeQP2_csv1, 30)
QP2D29_random_sample = random.sample(QP2D29_second_largeQP2_csv1, 30)
QP2D30_random_sample = random.sample(QP2D30_second_largeQP2_csv1, 30)
QP2D32_random_sample = random.sample(QP2D32_second_largeQP2_csv1, 30)
QP2D35_random_sample = random.sample(QP2D35_second_largeQP2_csv1, 30)


# 各リストから23個の要素をランダムに抽出してリストに格納
second_largeQP2_csv = (
    QP2D1_random_sample + QP2D2_random_sample + QP2D4_random_sample +
    QP2D5_random_sample + QP2D6_random_sample + QP2D7_random_sample +
    QP2D9_random_sample + QP2D10_random_sample + QP2D12_random_sample + 
    QP2D13_random_sample + QP2D14_random_sample + QP2D15_random_sample + 
    QP2D17_random_sample + QP2D19_random_sample + QP2D20_random_sample +
    QP2D22_random_sample + QP2D24_random_sample + QP2D25_random_sample + 
    QP2D27_random_sample + QP2D29_random_sample + QP2D30_random_sample + 
    QP2D32_random_sample + QP2D35_random_sample)

# 結果の表示
print("second_largeQP2_csv:", len(second_largeQP2_csv))

# 各リストから23個の要素を除いた残りの要素を取得
QP2D1_remaining = [item for item in QP2D1_second_largeQP2_csv1 if item not in QP2D1_random_sample]
QP2D2_remaining = [item for item in QP2D2_second_largeQP2_csv1 if item not in QP2D2_random_sample]
QP2D4_remaining = [item for item in QP2D4_second_largeQP2_csv1 if item not in QP2D4_random_sample]
QP2D5_remaining = [item for item in QP2D5_second_largeQP2_csv1 if item not in QP2D5_random_sample]
QP2D6_remaining = [item for item in QP2D6_second_largeQP2_csv1 if item not in QP2D6_random_sample]
QP2D7_remaining = [item for item in QP2D7_second_largeQP2_csv1 if item not in QP2D7_random_sample]
QP2D9_remaining = [item for item in QP2D9_second_largeQP2_csv1 if item not in QP2D9_random_sample]
QP2D10_remaining = [item for item in QP2D10_second_largeQP2_csv1 if item not in QP2D10_random_sample]
QP2D12_remaining = [item for item in QP2D12_second_largeQP2_csv1 if item not in QP2D12_random_sample]
QP2D13_remaining = [item for item in QP2D13_second_largeQP2_csv1 if item not in QP2D13_random_sample]
QP2D14_remaining = [item for item in QP2D14_second_largeQP2_csv1 if item not in QP2D14_random_sample]
QP2D15_remaining = [item for item in QP2D15_second_largeQP2_csv1 if item not in QP2D15_random_sample]
QP2D17_remaining = [item for item in QP2D17_second_largeQP2_csv1 if item not in QP2D17_random_sample]
QP2D19_remaining = [item for item in QP2D19_second_largeQP2_csv1 if item not in QP2D19_random_sample]
QP2D20_remaining = [item for item in QP2D20_second_largeQP2_csv1 if item not in QP2D20_random_sample]
QP2D22_remaining = [item for item in QP2D22_second_largeQP2_csv1 if item not in QP2D22_random_sample]
QP2D24_remaining = [item for item in QP2D24_second_largeQP2_csv1 if item not in QP2D24_random_sample]
QP2D25_remaining = [item for item in QP2D25_second_largeQP2_csv1 if item not in QP2D25_random_sample]
QP2D27_remaining = [item for item in QP2D27_second_largeQP2_csv1 if item not in QP2D27_random_sample]
QP2D29_remaining = [item for item in QP2D29_second_largeQP2_csv1 if item not in QP2D29_random_sample]
QP2D30_remaining = [item for item in QP2D30_second_largeQP2_csv1 if item not in QP2D30_random_sample]
QP2D32_remaining = [item for item in QP2D32_second_largeQP2_csv1 if item not in QP2D32_random_sample]
QP2D35_remaining = [item for item in QP2D35_second_largeQP2_csv1 if item not in QP2D35_random_sample]



# 残りの要素からランダムに10個の要素を抽出
QP2D1_random_remaining = random.sample(QP2D1_remaining, 13)
QP2D2_random_remaining = random.sample(QP2D2_remaining, 13)
QP2D4_random_remaining = random.sample(QP2D4_remaining, 13)
QP2D5_random_remaining = random.sample(QP2D5_remaining, 13)
QP2D6_random_remaining = random.sample(QP2D6_remaining, 13)
QP2D7_random_remaining = random.sample(QP2D7_remaining, 13)
QP2D9_random_remaining = random.sample(QP2D9_remaining, 13)
QP2D10_random_remaining = random.sample(QP2D10_remaining, 13)
QP2D12_random_remaining = random.sample(QP2D12_remaining, 13)
QP2D13_random_remaining = random.sample(QP2D13_remaining, 13)
QP2D14_random_remaining = random.sample(QP2D14_remaining, 13)
QP2D15_random_remaining = random.sample(QP2D15_remaining, 13)
QP2D17_random_remaining = random.sample(QP2D17_remaining, 13)
QP2D19_random_remaining = random.sample(QP2D19_remaining, 13)
QP2D20_random_remaining = random.sample(QP2D20_remaining, 13)
QP2D22_random_remaining = random.sample(QP2D22_remaining, 13)
QP2D24_random_remaining = random.sample(QP2D24_remaining, 13)
QP2D25_random_remaining = random.sample(QP2D25_remaining, 13)
QP2D27_random_remaining = random.sample(QP2D27_remaining, 13)
QP2D29_random_remaining = random.sample(QP2D29_remaining, 13)
QP2D30_random_remaining = random.sample(QP2D30_remaining, 13)
QP2D32_random_remaining = random.sample(QP2D32_remaining, 13)
QP2D35_random_remaining = random.sample(QP2D35_remaining, 13)


second_largeQP2_test_csv = (
    QP2D1_random_remaining + QP2D2_random_remaining + QP2D4_random_remaining +
    QP2D5_random_remaining + QP2D6_random_remaining + QP2D7_random_remaining +
    QP2D9_random_remaining + QP2D10_random_remaining + QP2D12_random_remaining + 
    QP2D13_random_remaining + QP2D14_random_remaining + QP2D15_random_remaining + 
    QP2D17_random_remaining + QP2D19_random_remaining + QP2D20_random_remaining + 
    QP2D22_random_remaining + QP2D24_random_remaining + QP2D25_random_remaining + 
    QP2D27_random_remaining + QP2D29_random_remaining + QP2D30_random_remaining + 
    QP2D32_random_remaining + QP2D35_random_remaining)

# 結果の表示
print("second_largeQP2_test_csv:", len(second_largeQP2_test_csv))

second_largeQP2_csv: 690
second_largeQP2_test_csv: 299


In [12]:
train_csv_list = single_csv + second_largeQP1_csv + second_sameQP_csv + second_largeQP2_csv
print("train_csv_list: ", len(train_csv_list))

# test_csv_largeQP1_list = single_test_csv1 + second_largeQP1_test_csv
# print("test_csv_largeQP1_list: ", len(test_csv_largeQP1_list))

# test_csv_sameQP_list = single_test_csv1 + second_sameQP_test_csv
# print("test_csv_sameQP_list: ", len(test_csv_sameQP_list))

# test_csv_largeQP2_list = single_test_csv2 + second_largeQP2_test_csv
# print("test_csv_largeQP2_list: ", len(test_csv_largeQP2_list))

train_csv_list:  4140


In [13]:
rootpath2 = "/Prove/Yoshihisa/HEIF_ghost/EXPERIMENT_DIFFERENT_SOFTWARE/"

# SINGLE
GIMP_path1 = os.path.join(rootpath2, 'GIMP_csv')
GIMP_path2 = os.path.join(rootpath2, 'GIMP_RECOMPRESSED_csv')

GIMP_path1_csv = [os.path.join(GIMP_path1, file) for file in sorted(os.listdir(GIMP_path1))]
GIMP_path2_csv = [os.path.join(GIMP_path2, file) for file in sorted(os.listdir(GIMP_path2))]

LIBHEIF_path1 = os.path.join(rootpath2, 'LIBHEIF_csv')
LIBHEIF_path2 = os.path.join(rootpath2, 'LIBHEIF_RECOMPRESSED_csv')

LIBHEIF_path1_csv = [os.path.join(LIBHEIF_path1, file) for file in sorted(os.listdir(LIBHEIF_path1))]
LIBHEIF_path2_csv = [os.path.join(LIBHEIF_path2, file) for file in sorted(os.listdir(LIBHEIF_path2))]


# DOUBLE
GIMP_GIMP_path1 = os.path.join(rootpath2, 'GIMP_GIMP_csv')
GIMP_GIMP_path2 = os.path.join(rootpath2, 'GIMP_GIMP_RECOMPRESSED_csv')

GIMP_GIMP_path1_csv = [os.path.join(GIMP_GIMP_path1, file) for file in sorted(os.listdir(GIMP_GIMP_path1))]
GIMP_GIMP_path2_csv = [os.path.join(GIMP_GIMP_path2, file) for file in sorted(os.listdir(GIMP_GIMP_path2))]

LIBHEIF_GIMP_path1 = os.path.join(rootpath2, 'LIBHEIF_GIMP_csv')
LIBHEIF_GIMP_path2 = os.path.join(rootpath2, 'LIBHEIF_GIMP_RECOMPRESSED_csv')

LIBHEIF_GIMP_path1_csv = [os.path.join(LIBHEIF_GIMP_path1, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path1))]
LIBHEIF_GIMP_path2_csv = [os.path.join(LIBHEIF_GIMP_path2, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path2))]

GIMP_LIBHEIF_path1 = os.path.join(rootpath2, 'GIMP_LIBHEIF_csv')
GIMP_LIBHEIF_path2 = os.path.join(rootpath2, 'GIMP_LIBHEIF_RECOMPRESSED_csv')

GIMP_LIBHEIF_path1_csv = [os.path.join(GIMP_LIBHEIF_path1, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path1))]
GIMP_LIBHEIF_path2_csv = [os.path.join(GIMP_LIBHEIF_path2, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path2))]


print("GIMP_path1_csv: ", len(GIMP_path1_csv))
print("GIMP_path2_csv: ", len(GIMP_path2_csv))
print("GIMP_GIMP_path1_csv: ", len(GIMP_GIMP_path1_csv))
print("GIMP_GIMP_path2_csv: ", len(GIMP_GIMP_path2_csv))
print()
print("GIMP_path1_csv: ", len(GIMP_path1_csv))
print("GIMP_path2_csv: ", len(GIMP_path2_csv))
print("LIBHEIF_GIMP_path1_csv: ", len(LIBHEIF_GIMP_path1_csv))
print("LIBHEIF_GIMP_path2_csv: ", len(LIBHEIF_GIMP_path2_csv))
print()
print("LIBHEIF_path1_csv: ", len(LIBHEIF_path1_csv))
print("LIBHEIF_path2_csv: ", len(LIBHEIF_path2_csv))
print("GIMP_LIBHEIF_path1_csv: ", len(GIMP_LIBHEIF_path1_csv))
print("GIMP_LIBHEIF_path2_csv: ", len(GIMP_LIBHEIF_path2_csv))


GIMP_path1_csv:  90
GIMP_path2_csv:  90
GIMP_GIMP_path1_csv:  90
GIMP_GIMP_path2_csv:  90

GIMP_path1_csv:  90
GIMP_path2_csv:  90
LIBHEIF_GIMP_path1_csv:  90
LIBHEIF_GIMP_path2_csv:  90

LIBHEIF_path1_csv:  90
LIBHEIF_path2_csv:  90
GIMP_LIBHEIF_path1_csv:  90
GIMP_LIBHEIF_path2_csv:  90


In [14]:
rootpath3 = "/Prove/Yoshihisa/HEIF_ghost/EXPERIMENT_DIFFERENT_SOFTWARE/PKL/"

# SINGLE
GIMP_path1 = os.path.join(rootpath3, 'pkl_GIMP')
GIMP_path2 = os.path.join(rootpath3, 'pkl_GIMP_RECOMPRESSED')

GIMP_path1_pkl = [os.path.join(GIMP_path1, file) for file in sorted(os.listdir(GIMP_path1))]
GIMP_path2_pkl = [os.path.join(GIMP_path2, file) for file in sorted(os.listdir(GIMP_path2))]

LIBHEIF_path1 = os.path.join(rootpath3, 'pkl_LIBHEIF')
LIBHEIF_path2 = os.path.join(rootpath3, 'pkl_LIBHEIF_RECOMPRESSED')

LIBHEIF_path1_pkl = [os.path.join(LIBHEIF_path1, file) for file in sorted(os.listdir(LIBHEIF_path1))]
LIBHEIF_path2_pkl = [os.path.join(LIBHEIF_path2, file) for file in sorted(os.listdir(LIBHEIF_path2))]


# DOUBLE
GIMP_GIMP_path1 = os.path.join(rootpath3, 'pkl_GIMP_GIMP')
GIMP_GIMP_path2 = os.path.join(rootpath3, 'pkl_GIMP_GIMP_RECOMPRESSED')

GIMP_GIMP_path1_pkl = [os.path.join(GIMP_GIMP_path1, file) for file in sorted(os.listdir(GIMP_GIMP_path1))]
GIMP_GIMP_path2_pkl = [os.path.join(GIMP_GIMP_path2, file) for file in sorted(os.listdir(GIMP_GIMP_path2))]

LIBHEIF_GIMP_path1 = os.path.join(rootpath3, 'pkl_LIBHEIF_GIMP')
LIBHEIF_GIMP_path2 = os.path.join(rootpath3, 'pkl_LIBHEIF_GIMP_RECOMPRESSED')

LIBHEIF_GIMP_path1_pkl = [os.path.join(LIBHEIF_GIMP_path1, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path1))]
LIBHEIF_GIMP_path2_pkl = [os.path.join(LIBHEIF_GIMP_path2, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path2))]

GIMP_LIBHEIF_path1 = os.path.join(rootpath3, 'pkl_GIMP_LIBHEIF')
GIMP_LIBHEIF_path2 = os.path.join(rootpath3, 'pkl_GIMP_LIBHEIF_RECOMPRESSED')

GIMP_LIBHEIF_path1_pkl = [os.path.join(GIMP_LIBHEIF_path1, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path1))]
GIMP_LIBHEIF_path2_pkl = [os.path.join(GIMP_LIBHEIF_path2, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path2))]


print("GIMP_path1_pkl: ", len(GIMP_path1_pkl))
print("GIMP_path2_pkl: ", len(GIMP_path2_pkl))
print("GIMP_GIMP_path1_pkl: ", len(GIMP_GIMP_path1_pkl))
print("GIMP_GIMP_path2_pkl: ", len(GIMP_GIMP_path2_pkl))
print()
print("GIMP_path1_pkl: ", len(GIMP_path1_pkl))
print("GIMP_path2_pkl: ", len(GIMP_path2_pkl))
print("LIBHEIF_GIMP_path1_pkl: ", len(LIBHEIF_GIMP_path1_pkl))
print("LIBHEIF_GIMP_path2_pkl: ", len(LIBHEIF_GIMP_path2_pkl))
print()
print("LIBHEIF_path1_pkl: ", len(LIBHEIF_path1_pkl))
print("LIBHEIF_path2_pkl: ", len(LIBHEIF_path2_pkl))
print("GIMP_LIBHEIF_path1_pkl: ", len(GIMP_LIBHEIF_path1_pkl))
print("GIMP_LIBHEIF_path2_pkl: ", len(GIMP_LIBHEIF_path2_pkl))


GIMP_path1_pkl:  90
GIMP_path2_pkl:  90
GIMP_GIMP_path1_pkl:  90
GIMP_GIMP_path2_pkl:  90

GIMP_path1_pkl:  90
GIMP_path2_pkl:  90
LIBHEIF_GIMP_path1_pkl:  90
LIBHEIF_GIMP_path2_pkl:  90

LIBHEIF_path1_pkl:  90
LIBHEIF_path2_pkl:  90
GIMP_LIBHEIF_path1_pkl:  90
GIMP_LIBHEIF_path2_pkl:  90


In [15]:
GIMP_csv = list(zip(GIMP_path1_csv, GIMP_path1_pkl, GIMP_path2_csv, GIMP_path2_pkl))
LIBHEIF_csv = list(zip(LIBHEIF_path1_csv, LIBHEIF_path1_pkl, LIBHEIF_path2_csv, LIBHEIF_path2_pkl))
GIMP_GIMP_csv = list(zip(GIMP_GIMP_path1_csv, GIMP_GIMP_path1_pkl, GIMP_GIMP_path2_csv, GIMP_GIMP_path2_pkl))
LIBHEIF_GIMP_csv = list(zip(LIBHEIF_GIMP_path1_csv, LIBHEIF_GIMP_path1_pkl, LIBHEIF_GIMP_path2_csv, LIBHEIF_GIMP_path2_pkl))
GIMP_LIBHEIF_csv = list(zip(GIMP_LIBHEIF_path1_csv, GIMP_LIBHEIF_path1_pkl, GIMP_LIBHEIF_path2_csv, GIMP_LIBHEIF_path2_pkl))

GIMP_GIMP_list = GIMP_csv + GIMP_GIMP_csv
LIBHEIF_GIMP_list = GIMP_csv + LIBHEIF_GIMP_csv
GIMP_LIBHEIF_list = LIBHEIF_csv + GIMP_LIBHEIF_csv


print("GIMP_GIMP_list: ", len(GIMP_GIMP_list))
print("LIBHEIF_GIMP_list: ", len(LIBHEIF_GIMP_list))
print("GIMP_LIBHEIF_list: ", len(GIMP_LIBHEIF_list))

GIMP_GIMP_list:  180
LIBHEIF_GIMP_list:  180
GIMP_LIBHEIF_list:  180


In [16]:
pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  
              "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]

# luminance_columns = ["LU1_0", "LU1_1", "LU1_9", "LU1_10", "LU1_11", "LU1_25", "LU1_26", "LU1_27", 
#                      "LU2_0", "LU2_1", "LU2_9", "LU2_10", "LU2_11", "LU2_25", "LU2_26", "LU2_27"]

luminance_columns = ["LU1_0", "LU1_1", "LU1_10", "LU1_26",
                     "LU2_0", "LU2_1", "LU2_10", "LU2_26"]

chrominance_columns = ["CH1_0", "CH1_1", "CH1_10", "CH1_26", "CH1_34", "CH1_36", 
                       "CH2_0", "CH2_1", "CH2_10", "CH2_26", "CH2_34", "CH2_36"]

label_columns = ["LABEL"]
mae1_columns = [f"MAE1_{i}" for i in range(52)]
mae2_columns = [f"MAE2_{i}" for i in range(52)]
mae_columns = ["MAE"]
final_qp_columns = ["FINAL_QP"]

# データフレームを初期化
train_df1_1 = pd.DataFrame(columns=pu_columns)
train_df1_2 = pd.DataFrame(columns=luminance_columns)
train_df1_3 = pd.DataFrame(columns=chrominance_columns)
train_df2 = pd.DataFrame(columns=label_columns)
train_df3 = pd.DataFrame(columns=mae1_columns)
train_df4 = pd.DataFrame(columns=mae2_columns)
train_df5 = pd.DataFrame(columns=mae_columns)
train_df6 = pd.DataFrame(columns=final_qp_columns)

scaler = MinMaxScaler()

for path1, path2, path3, path4 in train_csv_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    train_pkl_list = [path2, path4]
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    # lu_values = lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
    
    lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
    lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
    lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
    lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

    average_10_1 = np.mean(lu_values_10_1)
    average_10_2 = np.mean(lu_values_10_2)
    average_26_1 = np.mean(lu_values_26_1)
    average_26_2 = np.mean(lu_values_26_2)
    
    
    lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
    
    train_df1_1 = pd.concat([train_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    train_df1_2= pd.concat([train_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    train_df1_3 = pd.concat([train_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)

    # label_columnsの値を取得
    train_df2 = pd.concat([train_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

    final_QP = extract_finalQP(train_pkl_list[0])

    # MAEの値を取得
    mae_d1, mae_d1_tensor = calculate_mae(train_pkl_list[0])
    mae_d2, _ = calculate_mae(train_pkl_list[1])
    
    # mae1_columnsの値を取得
    train_df3 = pd.concat([train_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1[i]] for i in range(52)})], ignore_index=True)

    # mae2_columnsの値を取得
    train_df4 = pd.concat([train_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2[i]] for i in range(52)})], ignore_index=True)

    # mae_columnsの値を取得
    train_df5 = pd.concat([train_df5, pd.DataFrame({"MAE": [mae_d1_tensor]})], ignore_index=True)

    # final_qp_columnsの値を取得
    train_df6 = pd.concat([train_df6, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)

# インデックスをリセット
train_df1_1.reset_index(drop=True, inplace=True)
train_df1_2.reset_index(drop=True, inplace=True)
train_df1_3.reset_index(drop=True, inplace=True)
train_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
train_df = pd.concat([train_df1_1, train_df1_2, train_df1_3, train_df3, train_df4], axis=1)
train_df_onlyGhost = pd.concat([train_df3, train_df4], axis=1)

In [17]:
# スケーラーを使って結合したデータをスケーリング
X_train = scaler.fit_transform(train_df)
X_train_onlyGhost = scaler.fit_transform(train_df_onlyGhost)

# pandasをndarrayに変換
MAE = train_df5.values
FINAL_QP = train_df6.values

# ラベルの準備
Y_train = train_df2['LABEL'].astype(int)

print(f'Length of X_train: {len(X_train)}')
print(f'Length of X_train_onlyGhost: {len(X_train_onlyGhost)}')
print(f'Length of Y_train: {len(Y_train)}')
print(f'Length of MAE: {len(MAE)}')
print(f'Length of FINAL_QP: {len(FINAL_QP)}')

Length of X_train: 4140
Length of X_train_onlyGhost: 4140
Length of Y_train: 4140
Length of MAE: 4140
Length of FINAL_QP: 4140


In [18]:
# 列名をリストにまとめる
pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]
# luminance_columns = ["LU1_0","LU1_1","LU1_9","LU1_10","LU1_11","LU1_25","LU1_26","LU1_27", "LU2_0","LU2_1","LU2_9","LU2_10","LU2_11","LU2_25","LU2_26","LU2_27"]

luminance_columns = ["LU1_0", "LU1_1", "LU1_10", "LU1_26",
                     "LU2_0", "LU2_1", "LU2_10", "LU2_26"]

chrominance_columns = ["CH1_0","CH1_1","CH1_10","CH1_26","CH1_34","CH1_36", "CH2_0","CH2_1","CH2_10","CH2_26","CH2_34","CH2_36"]

label_columns = ["LABEL"]
mae1_columns = [f"MAE1_{i}" for i in range(52)]
mae2_columns = [f"MAE2_{i}" for i in range(52)]
mae_columns = ["MAE"]
final_qp_columns = ["FINAL_QP"]

# データフレームを初期化

GIMP_GIMP_df1_1 = pd.DataFrame(columns=pu_columns)
GIMP_GIMP_df1_2 = pd.DataFrame(columns=luminance_columns)
GIMP_GIMP_df1_3 = pd.DataFrame(columns=chrominance_columns)

GIMP_GIMP_df2 = pd.DataFrame(columns=label_columns)
GIMP_GIMP_df3 = pd.DataFrame(columns=mae1_columns)
GIMP_GIMP_df4 = pd.DataFrame(columns=mae2_columns)
GIMP_GIMP_df5 = pd.DataFrame(columns=mae_columns)
GIMP_GIMP_df6 = pd.DataFrame(columns=final_qp_columns)

scaler = MinMaxScaler()

for path1, path2, path3, path4 in GIMP_GIMP_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    test_pkl_list = [path2, path4]
    
    # pu_columnsの値を取得
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    
    
    lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
    lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
    lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
    lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

    average_10_1 = np.mean(lu_values_10_1)
    average_10_2 = np.mean(lu_values_10_2)
    average_26_1 = np.mean(lu_values_26_1)
    average_26_2 = np.mean(lu_values_26_2)
    
    
    lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    
    
    
    
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
    
    GIMP_GIMP_df1_1 = pd.concat([GIMP_GIMP_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    GIMP_GIMP_df1_2 = pd.concat([GIMP_GIMP_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    GIMP_GIMP_df1_3 = pd.concat([GIMP_GIMP_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
        
    
    # label_columnsの値を取得
    GIMP_GIMP_df2 = pd.concat([GIMP_GIMP_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)
    
    final_QP = extract_finalQP(test_pkl_list[0])
    
    # MAEの値を取得
    mae_d1, mae_d1_old = calculate_mae(test_pkl_list[0])
    mae_d2, _ = calculate_mae(test_pkl_list[1])
    
    
    # mae_columnsの値を取得
    GIMP_GIMP_df5 = pd.concat([GIMP_GIMP_df5, pd.DataFrame({"MAE": [mae_d1_old]})], ignore_index=True)
    
    # final_qp_columnsの値を取得
    GIMP_GIMP_df6 = pd.concat([GIMP_GIMP_df6, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)
    
    # mae1_columnsの値を取得
    GIMP_GIMP_df3 = pd.concat([GIMP_GIMP_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1[i]] for i in range(52)})], ignore_index=True)
    
    # mae2_columnsの値を取得
    GIMP_GIMP_df4 = pd.concat([GIMP_GIMP_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2[i]] for i in range(52)})], ignore_index=True)

# インデックスをリセット
GIMP_GIMP_df1_1.reset_index(drop=True, inplace=True)
GIMP_GIMP_df1_2.reset_index(drop=True, inplace=True)
GIMP_GIMP_df1_3.reset_index(drop=True, inplace=True)
GIMP_GIMP_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
GIMP_GIMP_df = pd.concat([GIMP_GIMP_df1_1, GIMP_GIMP_df1_2, GIMP_GIMP_df1_3, GIMP_GIMP_df3, GIMP_GIMP_df4], axis=1)
GIMP_GIMP_df_onlyGhost = pd.concat([GIMP_GIMP_df3, GIMP_GIMP_df4], axis=1)

In [19]:
# スケーラーを使って結合したデータをスケーリング
X_GIMP_GIMP_test = scaler.fit_transform(GIMP_GIMP_df)
X_GIMP_GIMP_test_onlyGhost = scaler.fit_transform(GIMP_GIMP_df_onlyGhost)

# pandasをndarrayに変換
GIMP_GIMP_df5_np = GIMP_GIMP_df5.values
GIMP_GIMP_QP = GIMP_GIMP_df6.values

# ラベルの準備
Y_GIMP_GIMP_test = GIMP_GIMP_df2['LABEL'].astype(int)

print(f'Length of X_GIMP_GIMP_test: {len(X_GIMP_GIMP_test)}')
print(f'Length of X_GIMP_GIMP_test_onlyGhost: {len(X_GIMP_GIMP_test_onlyGhost)}')
print(f'Length of Y_GIMP_GIMP_test: {len(Y_GIMP_GIMP_test)}')
print(f'Length of GIMP_GIMP_df5_np: {len(GIMP_GIMP_df5_np)}')
print(f'Length of GIMP_GIMP_QP: {len(GIMP_GIMP_QP)}')

Length of X_GIMP_GIMP_test: 180
Length of X_GIMP_GIMP_test_onlyGhost: 180
Length of Y_GIMP_GIMP_test: 180
Length of GIMP_GIMP_df5_np: 180
Length of GIMP_GIMP_QP: 180


In [20]:
# 列名をリストにまとめる
pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]
# luminance_columns = ["LU1_0","LU1_1","LU1_9","LU1_10","LU1_11","LU1_25","LU1_26","LU1_27", "LU2_0","LU2_1","LU2_9","LU2_10","LU2_11","LU2_25","LU2_26","LU2_27"]

luminance_columns = ["LU1_0", "LU1_1", "LU1_10", "LU1_26",
                     "LU2_0", "LU2_1", "LU2_10", "LU2_26"]

chrominance_columns = ["CH1_0","CH1_1","CH1_10","CH1_26","CH1_34","CH1_36", "CH2_0","CH2_1","CH2_10","CH2_26","CH2_34","CH2_36"]

label_columns = ["LABEL"]
mae1_columns = [f"MAE1_{i}" for i in range(52)]
mae2_columns = [f"MAE2_{i}" for i in range(52)]
mae_columns = ["MAE"]
final_qp_columns = ["FINAL_QP"]

# データフレームを初期化
LIBHEIF_GIMP_df1_1 = pd.DataFrame(columns=pu_columns)
LIBHEIF_GIMP_df1_2 = pd.DataFrame(columns=luminance_columns)
LIBHEIF_GIMP_df1_3 = pd.DataFrame(columns=chrominance_columns)

LIBHEIF_GIMP_df2 = pd.DataFrame(columns=label_columns)
LIBHEIF_GIMP_df3 = pd.DataFrame(columns=mae1_columns)
LIBHEIF_GIMP_df4 = pd.DataFrame(columns=mae2_columns)
LIBHEIF_GIMP_df5 = pd.DataFrame(columns=mae_columns)
LIBHEIF_GIMP_df6 = pd.DataFrame(columns=final_qp_columns)

scaler = MinMaxScaler()

for path1, path2, path3, path4 in LIBHEIF_GIMP_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    test_pkl_list = [path2, path4]
    
    # pu_columnsの値を取得
    
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    
    lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
    lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
    lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
    lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

    average_10_1 = np.mean(lu_values_10_1)
    average_10_2 = np.mean(lu_values_10_2)
    average_26_1 = np.mean(lu_values_26_1)
    average_26_2 = np.mean(lu_values_26_2)
    
    
    lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    
    
    
    
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
    
    LIBHEIF_GIMP_df1_1 = pd.concat([LIBHEIF_GIMP_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    LIBHEIF_GIMP_df1_2 = pd.concat([LIBHEIF_GIMP_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    LIBHEIF_GIMP_df1_3 = pd.concat([LIBHEIF_GIMP_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
    
    # label_columnsの値を取得
    LIBHEIF_GIMP_df2 = pd.concat([LIBHEIF_GIMP_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)
    
    final_QP = extract_finalQP(test_pkl_list[0])
    
    # MAEの値を取得
    mae_d1, mae_d1_old = calculate_mae(test_pkl_list[0])
    mae_d2, _ = calculate_mae(test_pkl_list[1])
    
    
    # mae_columnsの値を取得
    LIBHEIF_GIMP_df5 = pd.concat([LIBHEIF_GIMP_df5, pd.DataFrame({"MAE": [mae_d1_old]})], ignore_index=True)
    
    # final_qp_columnsの値を取得
    LIBHEIF_GIMP_df6 = pd.concat([LIBHEIF_GIMP_df6, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)
    
    # mae1_columnsの値を取得
    LIBHEIF_GIMP_df3 = pd.concat([LIBHEIF_GIMP_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1[i]] for i in range(52)})], ignore_index=True)
    
    # mae2_columnsの値を取得
    LIBHEIF_GIMP_df4 = pd.concat([LIBHEIF_GIMP_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2[i]] for i in range(52)})], ignore_index=True)

# インデックスをリセット
LIBHEIF_GIMP_df1_1.reset_index(drop=True, inplace=True)
LIBHEIF_GIMP_df1_2.reset_index(drop=True, inplace=True)
LIBHEIF_GIMP_df1_3.reset_index(drop=True, inplace=True)
LIBHEIF_GIMP_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
LIBHEIF_GIMP_df = pd.concat([LIBHEIF_GIMP_df1_1, LIBHEIF_GIMP_df1_2, LIBHEIF_GIMP_df1_3, LIBHEIF_GIMP_df3, LIBHEIF_GIMP_df4], axis=1)
LIBHEIF_GIMP_df_onlyGhost = pd.concat([LIBHEIF_GIMP_df3, LIBHEIF_GIMP_df4], axis=1)

In [21]:
# スケーラーを使って結合したデータをスケーリング
X_LIBHEIF_GIMP_test = scaler.fit_transform(LIBHEIF_GIMP_df)
X_LIBHEIF_GIMP_test_onlyGhost = scaler.fit_transform(LIBHEIF_GIMP_df_onlyGhost)

# pandasをndarrayに変換
LIBHEIF_GIMP_df5_np = LIBHEIF_GIMP_df5.values
LIBHEIF_GIMP_QP = LIBHEIF_GIMP_df6.values

# ラベルの準備
Y_LIBHEIF_GIMP_test = LIBHEIF_GIMP_df2['LABEL'].astype(int)

print(f'Length of X_LIBHEIF_GIMP_test: {len(X_LIBHEIF_GIMP_test)}')
print(f'Length of X_LIBHEIF_GIMP_test_onlyGhost: {len(X_LIBHEIF_GIMP_test_onlyGhost)}')
print(f'Length of Y_LIBHEIF_GIMP_test: {len(Y_LIBHEIF_GIMP_test)}')
print(f'Length of LIBHEIF_GIMP_df5_np: {len(LIBHEIF_GIMP_df5_np)}')
print(f'Length of LIBHEIF_GIMP_QP: {len(LIBHEIF_GIMP_QP)}')
print()

Length of X_LIBHEIF_GIMP_test: 180
Length of X_LIBHEIF_GIMP_test_onlyGhost: 180
Length of Y_LIBHEIF_GIMP_test: 180
Length of LIBHEIF_GIMP_df5_np: 180
Length of LIBHEIF_GIMP_QP: 180



In [22]:
# 列名をリストにまとめる
pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]
# luminance_columns = ["LU1_0","LU1_1","LU1_9","LU1_10","LU1_11","LU1_25","LU1_26","LU1_27", "LU2_0","LU2_1","LU2_9","LU2_10","LU2_11","LU2_25","LU2_26","LU2_27"]

luminance_columns = ["LU1_0", "LU1_1", "LU1_10", "LU1_26",
                     "LU2_0", "LU2_1", "LU2_10", "LU2_26"]

chrominance_columns = ["CH1_0","CH1_1","CH1_10","CH1_26","CH1_34","CH1_36", "CH2_0","CH2_1","CH2_10","CH2_26","CH2_34","CH2_36"]

label_columns = ["LABEL"]
mae1_columns = [f"MAE1_{i}" for i in range(52)]
mae2_columns = [f"MAE2_{i}" for i in range(52)]
mae_columns = ["MAE"]
final_qp_columns = ["FINAL_QP"]

# データフレームを初期化
GIMP_LIBHEIF_df1_1 = pd.DataFrame(columns=pu_columns)
GIMP_LIBHEIF_df1_2 = pd.DataFrame(columns=luminance_columns)
GIMP_LIBHEIF_df1_3 = pd.DataFrame(columns=chrominance_columns)

GIMP_LIBHEIF_df2 = pd.DataFrame(columns=label_columns)
GIMP_LIBHEIF_df3 = pd.DataFrame(columns=mae1_columns)
GIMP_LIBHEIF_df4 = pd.DataFrame(columns=mae2_columns)
GIMP_LIBHEIF_df5 = pd.DataFrame(columns=mae_columns)
GIMP_LIBHEIF_df6 = pd.DataFrame(columns=final_qp_columns)

scaler = MinMaxScaler()

for path1, path2, path3, path4 in GIMP_LIBHEIF_list:
    label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path3)
    test_pkl_list = [path2, path4]
    
    # pu_columnsの値を取得
    pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
    # lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
    
    lu_values_10_1 = [df1.loc[i, "luminance_counts"] for i in [9,10,11]]
    lu_values_10_2 = [df2.loc[i, "luminance_counts"] for i in [9,10,11]]
    
    lu_values_26_1 = [df1.loc[i, "luminance_counts"] for i in [25,26,27]] 
    lu_values_26_2 = [df2.loc[i, "luminance_counts"] for i in [25,26,27]]

    average_10_1 = np.mean(lu_values_10_1)
    average_10_2 = np.mean(lu_values_10_2)
    average_26_1 = np.mean(lu_values_26_1)
    average_26_2 = np.mean(lu_values_26_2)
    
    
    lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_1)] + [int(average_26_1)] + [df2.loc[i, "luminance_counts"] for i in [0,1]] + [int(average_10_2)] + [int(average_26_2)]
    
    
    ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
    
    GIMP_LIBHEIF_df1_1 = pd.concat([GIMP_LIBHEIF_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
    GIMP_LIBHEIF_df1_2 = pd.concat([GIMP_LIBHEIF_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
    GIMP_LIBHEIF_df1_3 = pd.concat([GIMP_LIBHEIF_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
    
    # label_columnsの値を取得
    GIMP_LIBHEIF_df2 = pd.concat([GIMP_LIBHEIF_df2, pd.DataFrame({"LABEL": [label]})], ignore_index=True)
    
    final_QP = extract_finalQP(test_pkl_list[0])
    
    # MAEの値を取得
    mae_d1, mae_d1_old = calculate_mae(test_pkl_list[0])
    mae_d2, _ = calculate_mae(test_pkl_list[1])
    
    
    # mae_columnsの値を取得
    GIMP_LIBHEIF_df5 = pd.concat([GIMP_LIBHEIF_df5, pd.DataFrame({"MAE": [mae_d1_old]})], ignore_index=True)
    
    # final_qp_columnsの値を取得
    GIMP_LIBHEIF_df6 = pd.concat([GIMP_LIBHEIF_df6, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)
    
    # mae1_columnsの値を取得
    GIMP_LIBHEIF_df3 = pd.concat([GIMP_LIBHEIF_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1[i]] for i in range(52)})], ignore_index=True)
    
    # mae2_columnsの値を取得
    GIMP_LIBHEIF_df4 = pd.concat([GIMP_LIBHEIF_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2[i]] for i in range(52)})], ignore_index=True)

# インデックスをリセット
GIMP_LIBHEIF_df1_1.reset_index(drop=True, inplace=True)
GIMP_LIBHEIF_df1_2.reset_index(drop=True, inplace=True)
GIMP_LIBHEIF_df1_3.reset_index(drop=True, inplace=True)
GIMP_LIBHEIF_df2.reset_index(drop=True, inplace=True)
    
# データフレームを結合
GIMP_LIBHEIF_df = pd.concat([GIMP_LIBHEIF_df1_1, GIMP_LIBHEIF_df1_2, GIMP_LIBHEIF_df1_3, GIMP_LIBHEIF_df3, GIMP_LIBHEIF_df4], axis=1)
GIMP_LIBHEIF_df_onlyGhost = pd.concat([GIMP_LIBHEIF_df3, GIMP_LIBHEIF_df4], axis=1)

In [23]:
# スケーラーを使って結合したデータをスケーリング
X_GIMP_LIBHEIF_test = scaler.fit_transform(GIMP_LIBHEIF_df)
X_GIMP_LIBHEIF_test_onlyGhost = scaler.fit_transform(GIMP_LIBHEIF_df_onlyGhost)

# pandasをndarrayに変換
GIMP_LIBHEIF_df5_np = GIMP_LIBHEIF_df5.values
GIMP_LIBHEIF_QP = GIMP_LIBHEIF_df6.values

# ラベルの準備
Y_GIMP_LIBHEIF_test = GIMP_LIBHEIF_df2['LABEL'].astype(int)

print(f'Length of X_GIMP_LIBHEIF_test: {len(X_GIMP_LIBHEIF_test)}')
print(f'Length of X_GIMP_LIBHEIF_test_onlyGhost: {len(X_GIMP_LIBHEIF_test_onlyGhost)}')
print(f'Length of Y_GIMP_LIBHEIF_test: {len(Y_GIMP_LIBHEIF_test)}')
print(f'Length of GIMP_LIBHEIF_df5_np: {len(GIMP_LIBHEIF_df5_np)}')
print(f'Length of GIMP_LIBHEIF_QP: {len(GIMP_LIBHEIF_QP)}')


Length of X_GIMP_LIBHEIF_test: 180
Length of X_GIMP_LIBHEIF_test_onlyGhost: 180
Length of Y_GIMP_LIBHEIF_test: 180
Length of GIMP_LIBHEIF_df5_np: 180
Length of GIMP_LIBHEIF_QP: 180


In [24]:
plt.rcParams["font.size"]=5
plt.rcParams["figure.figsize"]=(2.0, 1.0)
plt.rcParams["figure.dpi"]= 300


# Cの範囲を指定
C_values = {'C': [0.01, 0.1, 1, 10, 100, 1000, 2000, 3000, 4000, 5000]}
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# 結果のデータフレームを初期化
results = pd.DataFrame(columns=['C_RBF_GIMP_GIMP','GIMP_GIMP_RBF','C_RBF_LIBHEIF_GIMP','LIBHEIF_GIMP_RBF',
                                'C_RBF_GIMP_LIBHEIF','GIMP_LIBHEIF_RBF','C_LINEAR_GIMP_GIMP','GIMP_GIMP_LINEAR',
                                'C_LINEAR_LIBHEIF_GIMP', 'LIBHEIF_GIMP_LINEAR','C_LINEAR_GIMP_LIBHEIF', 'GIMP_LIBHEIF_LINEAR',
                                'C_onlyGhost_RBF_GIMP_GIMP', 'GIMP_GIMP_onlyGhost_RBF','C_onlyGhost_RBF_LIBHEIF_GIMP', 'LIBHEIF_GIMP_onlyGhost_RBF',
                                'C_onlyGhost_RBF_GIMP_LIBHEIF', 'GIMP_LIBHEIF_onlyGhost_RBF','C_onlyGhost_LINEAR_GIMP_GIMP', 'GIMP_GIMP_onlyGhost_LINEAR',
                                'C_onlyGhost_LINEAR_LIBHEIF_GIMP', 'LIBHEIF_GIMP_onlyGhost_LINEAR','C_onlyGhost_LINEAR_GIMP_LIBHEIF', 'GIMP_LIBHEIF_onlyGhost_LINEAR',
                                'Threshold_GIMP_GIMP', 'GIMP_GIMP_old','Threshold_LIBHEIF_GIMP', 'LIBHEIF_GIMP_old',
                                'Threshold_GIMP_LIBHEIF', 'GIMP_LIBHEIF_old'])


# 訓練用データ（RBFとlinearカーネル用）
original_X_train, original_X_train_onlyGhost = X_train, X_train_onlyGhost
original_Y_train = Y_train

# テストデータ(GIMP-GIMP, LIBHEIF_GIMP, GIMP_LIBHEIF)
original_X_GIMP_GIMP_test, original_X_GIMP_GIMP_test_onlyGhost = X_GIMP_GIMP_test, X_GIMP_GIMP_test_onlyGhost
original_X_LIBHEIF_GIMP_test, original_X_LIBHEIF_GIMP_test_onlyGhost = X_LIBHEIF_GIMP_test, X_LIBHEIF_GIMP_test_onlyGhost
original_X_GIMP_LIBHEIF_test, original_X_GIMP_LIBHEIF_test_onlyGhost = X_GIMP_LIBHEIF_test, X_GIMP_LIBHEIF_test_onlyGhost

original_Y_GIMP_GIMP_test = Y_GIMP_GIMP_test
original_Y_LIBHEIF_GIMP_test = Y_LIBHEIF_GIMP_test
original_Y_GIMP_LIBHEIF_test = Y_GIMP_LIBHEIF_test

# 閾値用テストデータ(GIMP-GIMP, LIBHEIF_GIMP, GIMP_LIBHEIF)
original_GIMP_GIMP_old, original_GIMP_GIMP_final_QP = GIMP_GIMP_df5_np, GIMP_GIMP_QP
original_LIBHEIF_GIMP_old, original_LIBHEIF_GIMP_final_QP = LIBHEIF_GIMP_df5_np, LIBHEIF_GIMP_QP
original_GIMP_LIBHEIF_old, original_GIMP_LIBHEIF_final_QP = GIMP_LIBHEIF_df5_np, GIMP_LIBHEIF_QP


# k-fold cross-validation
for fold, (train_ids, test_ids) in enumerate(kfold.split(original_X_train, original_Y_train)):
    print(f"<Fold-{fold+1}>")
    print(len(train_ids), len(test_ids))
    
    results_old = []
    
    X_train_val, _ = original_X_train[train_ids], original_X_train[test_ids]
    X_train_onlyGhost_val, _ = original_X_train_onlyGhost[train_ids], original_X_train_onlyGhost[test_ids]
    
    X_GIMP_GIMP_test, X_GIMP_GIMP_test_onlyGhost = original_X_GIMP_GIMP_test, original_X_GIMP_GIMP_test_onlyGhost
    X_LIBHEIF_GIMP_test, X_LIBHEIF_GIMP_test_onlyGhost = original_X_LIBHEIF_GIMP_test, original_X_LIBHEIF_GIMP_test_onlyGhost
    X_GIMP_LIBHEIF_test, X_GIMP_LIBHEIF_test_onlyGhost = original_X_GIMP_LIBHEIF_test, original_X_GIMP_LIBHEIF_test_onlyGhost
    
    X_GIMP_GIMP_old, GIMP_GIMP_QP = original_GIMP_GIMP_old, original_GIMP_GIMP_final_QP
    X_LIBHEIF_GIMP_old, LIBHEIF_GIMP_QP = original_LIBHEIF_GIMP_old, original_LIBHEIF_GIMP_final_QP
    X_GIMP_LIBHEIF_old, GIMP_LIBHEIF_QP = original_GIMP_LIBHEIF_old, original_GIMP_LIBHEIF_final_QP
    
    
    # 全体を訓練・検証ラベルとテストラベルに分割
    Y_train_val, _ = original_Y_train[train_ids], original_Y_train[test_ids]
    
    Y_GIMP_GIMP_test = original_Y_GIMP_GIMP_test
    Y_LIBHEIF_GIMP_test = original_Y_LIBHEIF_GIMP_test
    Y_GIMP_LIBHEIF_test = original_Y_GIMP_LIBHEIF_test
    
    # 訓練・検証データ（ラベル）を訓練データ（ラベル）と検証データ（ラベル）に分割
    X_train, X_val, Y_train, Y_val = train_test_split(X_train_val, Y_train_val, test_size=414, random_state=42)
    X_train_onlyGhost, X_val_onlyGhost, Y_train, Y_val = train_test_split(X_train_onlyGhost_val, Y_train_val, test_size=414, random_state=42)
    
 
    GIMP_GIMP_best_threshold = 0
    GIMP_GIMP_best_accuracy = 0
    GIMP_GIMP_best_predicted_labels = []
    GIMP_GIMP_best_ground_truth_labels = []
    
    LIBHEIF_GIMP_best_threshold = 0
    LIBHEIF_GIMP_best_accuracy = 0
    LIBHEIF_GIMP_best_predicted_labels = []
    LIBHEIF_GIMP_best_ground_truth_labels = []
    
    GIMP_LIBHEIF_best_threshold = 0
    GIMP_LIBHEIF_best_accuracy = 0
    GIMP_LIBHEIF_best_predicted_labels = []
    GIMP_LIBHEIF_best_ground_truth_labels = []
    
    for threshold in np.arange(0.01,1.01,0.01):
        GIMP_GIMP_old = [is_double_compressed(X_GIMP_GIMP_old[i], GIMP_GIMP_QP[i], threshold) for i in range(180)]
        
        GIMP_GIMP_predicted_labels = [int(is_double) for is_double in GIMP_GIMP_old]
        GIMP_GIMP_ground_truth_labels = [label for label in Y_GIMP_GIMP_test]
        GIMP_GIMP_accuracy = sum(1 for true_label, pred_label in zip(GIMP_GIMP_ground_truth_labels, GIMP_GIMP_predicted_labels) if true_label == pred_label) / len(GIMP_GIMP_ground_truth_labels)
        
        if GIMP_GIMP_accuracy > GIMP_GIMP_best_accuracy:
            GIMP_GIMP_best_accuracy = GIMP_GIMP_accuracy
            GIMP_GIMP_best_threshold = threshold
            GIMP_GIMP_best_predicted_labels = GIMP_GIMP_predicted_labels
            GIMP_GIMP_best_ground_truth_labels = GIMP_GIMP_ground_truth_labels
            
            
    for threshold in np.arange(0.01,1.01,0.01):
        LIBHEIF_GIMP_old = [is_double_compressed(X_LIBHEIF_GIMP_old[i], LIBHEIF_GIMP_QP[i], threshold) for i in range(180)]
        
        LIBHEIF_GIMP_predicted_labels = [int(is_double) for is_double in LIBHEIF_GIMP_old]
        LIBHEIF_GIMP_ground_truth_labels = [label for label in Y_LIBHEIF_GIMP_test]
        LIBHEIF_GIMP_accuracy = sum(1 for true_label, pred_label in zip(LIBHEIF_GIMP_ground_truth_labels, LIBHEIF_GIMP_predicted_labels) if true_label == pred_label) / len(LIBHEIF_GIMP_ground_truth_labels)
        
        if LIBHEIF_GIMP_accuracy > LIBHEIF_GIMP_best_accuracy:
            LIBHEIF_GIMP_best_accuracy = LIBHEIF_GIMP_accuracy
            LIBHEIF_GIMP_best_threshold = threshold
            LIBHEIF_GIMP_best_predicted_labels = LIBHEIF_GIMP_predicted_labels
            LIBHEIF_GIMP_best_ground_truth_labels = LIBHEIF_GIMP_ground_truth_labels
            
            
    for threshold in np.arange(0.01,1.01,0.01):
        GIMP_LIBHEIF_old = [is_double_compressed(X_GIMP_LIBHEIF_old[i], GIMP_LIBHEIF_QP[i], threshold) for i in range(180)]
        
        GIMP_LIBHEIF_predicted_labels = [int(is_double) for is_double in GIMP_LIBHEIF_old]
        GIMP_LIBHEIF_ground_truth_labels = [label for label in Y_GIMP_LIBHEIF_test]
        GIMP_LIBHEIF_accuracy = sum(1 for true_label, pred_label in zip(GIMP_LIBHEIF_ground_truth_labels, GIMP_LIBHEIF_predicted_labels) if true_label == pred_label) / len(GIMP_LIBHEIF_ground_truth_labels)
        
        if GIMP_LIBHEIF_accuracy > GIMP_LIBHEIF_best_accuracy:
            GIMP_LIBHEIF_best_accuracy = GIMP_LIBHEIF_accuracy
            GIMP_LIBHEIF_best_threshold = threshold
            GIMP_LIBHEIF_best_predicted_labels = GIMP_LIBHEIF_predicted_labels
            GIMP_LIBHEIF_best_ground_truth_labels = GIMP_LIBHEIF_ground_truth_labels
            
    
    best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = 0, None, None
    best_val_score_onlyGhost_RBF, best_svm_model_onlyGhost_RBF, best_c_value_onlyGhost_RBF = 0, None, None
    
    best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = 0, None, None
    best_val_score_onlyGhost_LINEAR, best_svm_model_onlyGhost_LINEAR, best_c_value_onlyGhost_LINEAR = 0, None, None
        
    for C_value in C_values['C']:    
        # SVMモデルのインスタンスを作成
        svm_model_RBF = SVC(kernel='rbf', C=C_value)
        svm_model_onlyGhost_RBF = SVC(kernel='rbf', C=C_value)
        
        svm_model_LINEAR = SVC(kernel='linear', C=C_value)
        svm_model_onlyGhost_LINEAR = SVC(kernel='linear', C=C_value)

        # 訓練データで訓練
        svm_model_RBF.fit(X_train, Y_train)
        svm_model_onlyGhost_RBF.fit(X_train_onlyGhost, Y_train)
        
        svm_model_LINEAR.fit(X_train, Y_train)
        svm_model_onlyGhost_LINEAR.fit(X_train_onlyGhost, Y_train)

        
        val_accuracy_RBF = accuracy_score(Y_val, svm_model_RBF.predict(X_val))
        val_accuracy_onlyGhost_RBF = accuracy_score(Y_val, svm_model_onlyGhost_RBF.predict(X_val_onlyGhost))
        
        val_accuracy_LINEAR = accuracy_score(Y_val, svm_model_LINEAR.predict(X_val))
        val_accuracy_onlyGhost_LINEAR = accuracy_score(Y_val, svm_model_onlyGhost_LINEAR.predict(X_val_onlyGhost))
        

        # 検証データでの精度が最も高かった場合、そのモデルを保存
        if val_accuracy_RBF > best_val_score_RBF:
            best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = val_accuracy_RBF, svm_model_RBF, C_value

        if val_accuracy_onlyGhost_RBF > best_val_score_onlyGhost_RBF:
            best_val_score_onlyGhost_RBF, best_svm_model_onlyGhost_RBF, best_c_value_onlyGhost_RBF = val_accuracy_onlyGhost_RBF, svm_model_onlyGhost_RBF, C_value
            
        if val_accuracy_LINEAR > best_val_score_LINEAR:
            best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = val_accuracy_LINEAR, svm_model_LINEAR, C_value

        if val_accuracy_onlyGhost_LINEAR > best_val_score_onlyGhost_LINEAR:
            best_val_score_onlyGhost_LINEAR, best_svm_model_onlyGhost_LINEAR, best_c_value_onlyGhost_LINEAR = val_accuracy_onlyGhost_LINEAR, svm_model_onlyGhost_LINEAR, C_value

            
    # テストデータで評価
    GIMP_GIMP_predictions_RBF = best_svm_model_RBF.predict(X_GIMP_GIMP_test)
    GIMP_GIMP_predictions_prob_RBF = best_svm_model_RBF.decision_function(X_GIMP_GIMP_test)
    GIMP_GIMP_accuracy_RBF = accuracy_score(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_RBF)
    GIMP_GIMP_report_RBF = classification_report(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_RBF, digits=4)
    print(f'GIMP_GIMP_report_RBF:\n{GIMP_GIMP_report_RBF}')
    
    LIBHEIF_GIMP_predictions_RBF = best_svm_model_RBF.predict(X_LIBHEIF_GIMP_test)
    LIBHEIF_GIMP_predictions_prob_RBF = best_svm_model_RBF.decision_function(X_LIBHEIF_GIMP_test)
    LIBHEIF_GIMP_accuracy_RBF = accuracy_score(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_RBF)
    LIBHEIF_GIMP_report_RBF = classification_report(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_RBF, digits=4)
    print(f'LIBHEIF_GIMP_report_RBF:\n{LIBHEIF_GIMP_report_RBF}')
    
    GIMP_LIBHEIF_predictions_RBF = best_svm_model_RBF.predict(X_GIMP_LIBHEIF_test)
    GIMP_LIBHEIF_predictions_prob_RBF = best_svm_model_RBF.decision_function(X_GIMP_LIBHEIF_test)
    GIMP_LIBHEIF_accuracy_RBF = accuracy_score(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_RBF)
    GIMP_LIBHEIF_report_RBF = classification_report(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_RBF, digits=4)
    print(f'GIMP_LIBHEIF_report_RBF:\n{GIMP_LIBHEIF_report_RBF}')
    
    GIMP_GIMP_predictions_LINEAR = best_svm_model_LINEAR.predict(X_GIMP_GIMP_test)
    GIMP_GIMP_predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_GIMP_GIMP_test)
    GIMP_GIMP_accuracy_LINEAR = accuracy_score(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_LINEAR)
    GIMP_GIMP_report_LINEAR = classification_report(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_LINEAR, digits=4)
    print(f'GIMP_GIMP_report_LINEAR:\n{GIMP_GIMP_report_LINEAR}')
    
    LIBHEIF_GIMP_predictions_LINEAR = best_svm_model_LINEAR.predict(X_LIBHEIF_GIMP_test)
    LIBHEIF_GIMP_predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_LIBHEIF_GIMP_test)
    LIBHEIF_GIMP_accuracy_LINEAR = accuracy_score(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_LINEAR)
    LIBHEIF_GIMP_report_LINEAR = classification_report(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_LINEAR, digits=4)
    print(f'LIBHEIF_GIMP_report_LINEAR:\n{LIBHEIF_GIMP_report_LINEAR}')
    
    GIMP_LIBHEIF_predictions_LINEAR = best_svm_model_LINEAR.predict(X_GIMP_LIBHEIF_test)
    GIMP_LIBHEIF_predictions_prob_LINEAR = best_svm_model_LINEAR.decision_function(X_GIMP_LIBHEIF_test)
    GIMP_LIBHEIF_accuracy_LINEAR = accuracy_score(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_LINEAR)
    GIMP_LIBHEIF_report_LINEAR = classification_report(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_LINEAR, digits=4)
    print(f'GIMP_LIBHEIF_report_LINEAR:\n{GIMP_LIBHEIF_report_LINEAR}')
    
    
    # テストデータで評価
    GIMP_GIMP_predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_GIMP_GIMP_test_onlyGhost)
    GIMP_GIMP_predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_GIMP_GIMP_test_onlyGhost)
    GIMP_GIMP_accuracy_onlyGhost_RBF = accuracy_score(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_onlyGhost_RBF)
    GIMP_GIMP_report_onlyGhost_RBF = classification_report(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_onlyGhost_RBF, digits=4)
    print(f'GIMP_GIMP_report_onlyGhost_RBF:\n{GIMP_GIMP_report_onlyGhost_RBF}')
    
    LIBHEIF_GIMP_predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_LIBHEIF_GIMP_test_onlyGhost)
    LIBHEIF_GIMP_predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_LIBHEIF_GIMP_test_onlyGhost)
    LIBHEIF_GIMP_accuracy_onlyGhost_RBF = accuracy_score(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_onlyGhost_RBF)
    LIBHEIF_GIMP_report_onlyGhost_RBF = classification_report(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_onlyGhost_RBF, digits=4)
    print(f'LIBHEIF_GIMP_report_onlyGhost_RBF:\n{LIBHEIF_GIMP_report_onlyGhost_RBF}')
    
    GIMP_LIBHEIF_predictions_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.predict(X_GIMP_LIBHEIF_test_onlyGhost)
    GIMP_LIBHEIF_predictions_prob_onlyGhost_RBF = best_svm_model_onlyGhost_RBF.decision_function(X_GIMP_LIBHEIF_test_onlyGhost)
    GIMP_LIBHEIF_accuracy_onlyGhost_RBF = accuracy_score(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_onlyGhost_RBF)
    GIMP_LIBHEIF_report_onlyGhost_RBF = classification_report(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_onlyGhost_RBF, digits=4)
    print(f'GIMP_LIBHEIF_report_onlyGhost_RBF:\n{GIMP_LIBHEIF_report_onlyGhost_RBF}')
    
    GIMP_GIMP_predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_GIMP_GIMP_test_onlyGhost)
    GIMP_GIMP_predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_GIMP_GIMP_test_onlyGhost)
    GIMP_GIMP_accuracy_onlyGhost_LINEAR = accuracy_score(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_onlyGhost_LINEAR)
    GIMP_GIMP_report_onlyGhost_LINEAR = classification_report(Y_GIMP_GIMP_test, GIMP_GIMP_predictions_onlyGhost_LINEAR, digits=4)
    print(f'GIMP_GIMP_report_onlyGhost_LINEAR:\n{GIMP_GIMP_report_onlyGhost_LINEAR}')
    
    LIBHEIF_GIMP_predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_LIBHEIF_GIMP_test_onlyGhost)
    LIBHEIF_GIMP_predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_LIBHEIF_GIMP_test_onlyGhost)
    LIBHEIF_GIMP_accuracy_onlyGhost_LINEAR = accuracy_score(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_onlyGhost_LINEAR)
    LIBHEIF_GIMP_report_onlyGhost_LINEAR = classification_report(Y_LIBHEIF_GIMP_test, LIBHEIF_GIMP_predictions_onlyGhost_LINEAR, digits=4)
    print(f'LIBHEIF_GIMP_report_onlyGhost_LINEAR:\n{LIBHEIF_GIMP_report_onlyGhost_LINEAR}')
    
    GIMP_LIBHEIF_predictions_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.predict(X_GIMP_LIBHEIF_test_onlyGhost)
    GIMP_LIBHEIF_predictions_prob_onlyGhost_LINEAR = best_svm_model_onlyGhost_LINEAR.decision_function(X_GIMP_LIBHEIF_test_onlyGhost)
    GIMP_LIBHEIF_accuracy_onlyGhost_LINEAR = accuracy_score(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_onlyGhost_LINEAR)
    GIMP_LIBHEIF_report_onlyGhost_LINEAR = classification_report(Y_GIMP_LIBHEIF_test, GIMP_LIBHEIF_predictions_onlyGhost_LINEAR, digits=4)
    print(f'GIMP_LIBHEIF_report_onlyGhost_LINEAR:\n{GIMP_LIBHEIF_report_onlyGhost_LINEAR}')
    

    GIMP_GIMP_old = classification_report(GIMP_GIMP_best_ground_truth_labels, GIMP_GIMP_best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    print(f'GIMP_GIMP_old:\n{GIMP_GIMP_old}')
    
    LIBHEIF_GIMP_old = classification_report(LIBHEIF_GIMP_best_ground_truth_labels, LIBHEIF_GIMP_best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    print(f'LIBHEIF_GIMP_old:\n{LIBHEIF_GIMP_old}')
    
    GIMP_LIBHEIF_old = classification_report(GIMP_LIBHEIF_best_ground_truth_labels, GIMP_LIBHEIF_best_predicted_labels, labels=[0,1], target_names=['0', '1'], zero_division=0, digits=4)
    print(f'GIMP_LIBHEIF_old:\n{GIMP_LIBHEIF_old}')
        
    # Test結果を保存
    
    result_row = {'C_RBF_GIMP_GIMP': best_c_value_RBF, 'GIMP_GIMP_RBF': GIMP_GIMP_accuracy_RBF,
                  'C_RBF_LIBHEIF_GIMP': best_c_value_RBF, 'LIBHEIF_GIMP_RBF': LIBHEIF_GIMP_accuracy_RBF,
                  'C_RBF_GIMP_LIBHEIF': best_c_value_RBF, 'GIMP_LIBHEIF_RBF': GIMP_LIBHEIF_accuracy_RBF,
                    
                  'C_LINEAR_GIMP_GIMP': best_c_value_LINEAR, 'GIMP_GIMP_LINEAR': GIMP_GIMP_accuracy_LINEAR,
                  'C_LINEAR_LIBHEIF_GIMP': best_c_value_LINEAR, 'LIBHEIF_GIMP_LINEAR': LIBHEIF_GIMP_accuracy_LINEAR,
                  'C_LINEAR_GIMP_LIBHEIF': best_c_value_LINEAR, 'GIMP_LIBHEIF_LINEAR': GIMP_LIBHEIF_accuracy_LINEAR,
                  
                  'C_onlyGhost_RBF_GIMP_GIMP': best_c_value_onlyGhost_RBF, 'GIMP_GIMP_onlyGhost_RBF': GIMP_GIMP_accuracy_onlyGhost_RBF,
                  'C_onlyGhost_RBF_LIBHEIF_GIMP': best_c_value_onlyGhost_RBF, 'LIBHEIF_GIMP_onlyGhost_RBF': LIBHEIF_GIMP_accuracy_onlyGhost_RBF,
                  'C_onlyGhost_RBF_GIMP_LIBHEIF': best_c_value_onlyGhost_RBF, 'GIMP_LIBHEIF_onlyGhost_RBF': GIMP_LIBHEIF_accuracy_onlyGhost_RBF,
                  
                  'C_onlyGhost_LINEAR_GIMP_GIMP': best_c_value_onlyGhost_LINEAR, 'GIMP_GIMP_onlyGhost_LINEAR': GIMP_GIMP_accuracy_onlyGhost_LINEAR,
                  'C_onlyGhost_LINEAR_LIBHEIF_GIMP': best_c_value_onlyGhost_LINEAR, 'LIBHEIF_GIMP_onlyGhost_LINEAR': LIBHEIF_GIMP_accuracy_onlyGhost_LINEAR,
                  'C_onlyGhost_LINEAR_GIMP_LIBHEIF': best_c_value_onlyGhost_LINEAR, 'GIMP_LIBHEIF_onlyGhost_LINEAR': GIMP_LIBHEIF_accuracy_onlyGhost_LINEAR,
                  
                  'Threshold_GIMP_GIMP': GIMP_GIMP_best_threshold, 'GIMP_GIMP_old': GIMP_GIMP_best_accuracy,
                  'Threshold_LIBHEIF_GIMP': LIBHEIF_GIMP_best_threshold, 'LIBHEIF_GIMP_old': LIBHEIF_GIMP_best_accuracy,
                  'Threshold_GIMP_LIBHEIF': GIMP_LIBHEIF_best_threshold, 'GIMP_LIBHEIF_old': GIMP_LIBHEIF_best_accuracy,
                 
                 }

    results = pd.concat([results, pd.DataFrame([result_row])], ignore_index=True)

# 結果を表示
# print(results)

<Fold-1>
3726 414
GIMP_GIMP_report_RBF:
              precision    recall  f1-score   support

           0     0.6306    0.7778    0.6965        90
           1     0.7101    0.5444    0.6164        90

    accuracy                         0.6611       180
   macro avg     0.6704    0.6611    0.6564       180
weighted avg     0.6704    0.6611    0.6564       180

LIBHEIF_GIMP_report_RBF:
              precision    recall  f1-score   support

           0     0.6154    0.8000    0.6957        90
           1     0.7143    0.5000    0.5882        90

    accuracy                         0.6500       180
   macro avg     0.6648    0.6500    0.6419       180
weighted avg     0.6648    0.6500    0.6419       180

GIMP_LIBHEIF_report_RBF:
              precision    recall  f1-score   support

           0     0.5943    0.7000    0.6429        90
           1     0.6351    0.5222    0.5732        90

    accuracy                         0.6111       180
   macro avg     0.6147    0.6111    0

In [25]:
def print_stats(column_name, label):
    average = round(results[column_name].mean(), 4)
    std_dev = round(results[column_name].std(), 4)
    max_value = round(results[column_name].max(), 4)
    min_value = round(results[column_name].min(), 4)

    print(f'Average Test Score {column_name}{label}: {average}')
    print(f'Standard Deviation of Test Score {column_name}{label}: {std_dev}')
    print(f'Maximum Test Score {column_name}{label}: {max_value}')
    print(f'Minimum Test Score {column_name}{label}: {min_value}')
    print('-----------------------------------------------------')

# 'Test_Score'列に関して統計情報を表示
print_stats('GIMP_GIMP_RBF', 'with RBF')
print_stats('GIMP_GIMP_LINEAR', 'with LINEAR')
print_stats('GIMP_GIMP_onlyGhost_RBF', 'with only Ghost and RBF')
print_stats('GIMP_GIMP_onlyGhost_LINEAR', 'with only Ghost and LINEAR')
print_stats('GIMP_GIMP_old', 'with old model')

print_stats('LIBHEIF_GIMP_RBF', 'with RBF')
print_stats('LIBHEIF_GIMP_LINEAR', 'with LINEAR')
print_stats('LIBHEIF_GIMP_onlyGhost_RBF', 'with only Ghost and RBF')
print_stats('LIBHEIF_GIMP_onlyGhost_LINEAR', 'with only Ghost and LINEAR')
print_stats('LIBHEIF_GIMP_old', 'with old model')

print_stats('GIMP_LIBHEIF_RBF', 'with RBF')
print_stats('GIMP_LIBHEIF_LINEAR', 'with LINEAR')
print_stats('GIMP_LIBHEIF_onlyGhost_RBF', 'with only Ghost and RBF')
print_stats('GIMP_LIBHEIF_onlyGhost_LINEAR', 'with only Ghost and LINEAR')
print_stats('GIMP_LIBHEIF_old', 'with old model')


Average Test Score GIMP_GIMP_RBFwith RBF: 0.6606
Standard Deviation of Test Score GIMP_GIMP_RBFwith RBF: 0.0593
Maximum Test Score GIMP_GIMP_RBFwith RBF: 0.7389
Minimum Test Score GIMP_GIMP_RBFwith RBF: 0.5778
-----------------------------------------------------
Average Test Score GIMP_GIMP_LINEARwith LINEAR: 0.6772
Standard Deviation of Test Score GIMP_GIMP_LINEARwith LINEAR: 0.0249
Maximum Test Score GIMP_GIMP_LINEARwith LINEAR: 0.7167
Minimum Test Score GIMP_GIMP_LINEARwith LINEAR: 0.6278
-----------------------------------------------------
Average Test Score GIMP_GIMP_onlyGhost_RBFwith only Ghost and RBF: 0.5217
Standard Deviation of Test Score GIMP_GIMP_onlyGhost_RBFwith only Ghost and RBF: 0.0516
Maximum Test Score GIMP_GIMP_onlyGhost_RBFwith only Ghost and RBF: 0.6167
Minimum Test Score GIMP_GIMP_onlyGhost_RBFwith only Ghost and RBF: 0.4333
-----------------------------------------------------
Average Test Score GIMP_GIMP_onlyGhost_LINEARwith only Ghost and LINEAR: 0.5928
Stan