In [1]:
import random
import os
import os.path as osp
import re
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks
import seaborn as sns
import pickle
import torch
import glob
from scipy.stats import entropy
from collections import defaultdict, Counter

pd.set_option('display.expand_frame_repr', False)  # DataFrameを改行せずに表示
pd.set_option('display.max_columns', None)  # すべての列を表示

In [2]:
def extract_finalQP(filename):
    match = re.search(r'2ndQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def extract_1stQP(filename):
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def ratio_double_compressed(mean_difference, final_QP):
    # mean_difference = mean_difference[0]
    # final_QP = final_QP[0]
    clamped_mean_difference = np.maximum(mean_difference, -0.01)
    
    #全体のエネルギーを計算
    energy = np.sum(np.square(clamped_mean_difference))
    # energy = np.sum(np.square(mean_difference))
    
    #QP2より右側のエネルギーを計算
    right_energy = np.sum(np.square(clamped_mean_difference[final_QP+1:52]))

        
    # エネルギー比を計算して閾値と比較
    if energy > 0:
        return right_energy / energy
    
    else:
        return 0

    
def is_double_compressed(mean_difference, final_QP, threshold):
    mean_difference = mean_difference[0]
    final_QP = final_QP[0]
    clamped_mean_difference = np.maximum(mean_difference, -0.01)
    
    #全体のエネルギーを計算
    energy = np.sum(np.square(clamped_mean_difference))
    # energy = np.sum(np.square(mean_difference))
    
    #QP2より右側のエネルギーを計算
    right_energy = np.sum(np.square(clamped_mean_difference[final_QP+1:52]))
    # right_energy = np.sum(np.square(mean_difference[final_QP+1:52]))
    
    
    # エネルギー比を計算して閾値と比較
    if energy <= 0:
        return -1
    
    elif (right_energy / energy) != 0 and (right_energy / energy) > threshold:
        return True
    
    elif (right_energy / energy) != 0 and (right_energy / energy) <= threshold:
        return False
    
    else:
        return -1

def calculate_mae(file_path):
    try:
        with open(file_path, 'rb') as file:
            loaded_data, loaded_data_shifted = pickle.load(file)
    except Exception as e:
        print(f"Error occurred while loading {file_path}: {e}")
        return None

    # タプル内のリストを抽出
    original_mae = np.array(loaded_data)
    shifted_mae = np.array(loaded_data_shifted)

    # Coding ghostを計算してリストに格納する
    mae_difference = shifted_mae - original_mae
    
    # mae_differenceの各要素においてマイナスの値を0に変換
    # mae_difference_positive = np.maximum(mae_difference, 0)
    
    return mae_difference

In [3]:
rootpath_csv = "/Prove/Yoshihisa/HEIF_ghost/HEIF_IMAGES_CSV/"
rootpath_pkl = "/Prove/Yoshihisa/HEIF_ghost/PKL/"

train_list1 = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"]

train_list2 = ["31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60"]

train_list3 = ["61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90"]

train_list4 = ["91", "92", "93", "94", "95", "96", "97", "98", "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109", "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120"]

train_list5 = ["121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131", "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", "143", "144", "145", "146", "147", "148", "149", "150"]

train_list6 = ["191", "192", "193", "194", "195", "196", "197", "198", "199", "200"]

train_list7 = ["201", "202", "203", "204", "205", "206", "207", "208", "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219", "220"]

train_list8 = ["221", "222", "223", "224", "225", "226", "227", "228", "229", "230", "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241", "242", "243", "244", "245", "246", "247", "248", "249", "250"]

train_list9 = ["251", "252", "253", "254", "255", "256", "257", "258", "259", "260", "261", "262", "263", "264", "265", "266", "267", "268", "269", "270", "271", "272", "273", "274", "275", "276", "277", "278", "279", "280"]

train_list10 = ["281", "282", "283", "284", "285", "286", "287", "288", "289", "290", "291", "292", "293", "294", "295", "296", "297", "298", "299", "300"]

all_train_lists = [train_list1, train_list2, train_list3, train_list4, train_list5,
                   train_list6, train_list7, train_list8, train_list9, train_list10]

# すべてのリストを1つのリストに結合する
combined_train_list = sum(all_train_lists, [])

# リストの順序をランダムにシャッフルする
random.shuffle(combined_train_list)

# シャッフルされたリストを10個のグループに分割する
train_lists = [combined_train_list[i:i+26] for i in range(0, len(combined_train_list), 26)]
print(train_lists)



# CSV関連のリストを生成
csv_single_listsA = [[] for _ in range(10)]
csv_single_recompress_listsA = [[] for _ in range(10)]
csv_second_largeQP1_listsA = [[] for _ in range(10)]
csv_second_recompress_largeQP1_listsA = [[] for _ in range(10)]
csv_second_sameQP_listsA = [[] for _ in range(10)]
csv_second_recompress_sameQP_listsA = [[] for _ in range(10)]
csv_second_largeQP2_listsA = [[] for _ in range(10)]
csv_second_recompress_largeQP2_listsA = [[] for _ in range(10)]

def process_csv_lists(rootpath, train_list, single_list, single_recompress_list, 
                      second_largeQP1_list, second_recompress_largeQP1_list, 
                      second_sameQP_list, second_recompress_sameQP_list,
                      second_largeQP2_list, second_recompress_largeQP2_list):
    
    for image in train_list:
        single_path = osp.join(rootpath, f'HEIF_images_single_csv/{image}_*')
        single_recompress_path = osp.join(rootpath, f'HEIF_images_second_sameQP_csv/{image}_*')
        
        second_largeQP1_path = osp.join(rootpath, f'HEIF_images_second_csv/{image}_*')
        second_recompress_largeQP1_path = osp.join(rootpath, f'HEIF_images_triple_csv/{image}_*')
        
        second_sameQP_path = osp.join(rootpath, f'HEIF_images_second_sameQP_csv/{image}_*')
        second_recompress_sameQP_path = osp.join(rootpath, f'HEIF_images_triple_sameQP_csv/{image}_*')
        
        second_largeQP2_path = osp.join(rootpath, f'HEIF_images_second_largeQP_csv/{image}_*')
        second_recompress_largeQP2_path = osp.join(rootpath, f'HEIF_images_triple_largeQP_csv/{image}_*')
        
        for path in sorted(glob.glob(single_path)):
            single_list.append(path)
        for path in sorted(glob.glob(single_recompress_path)):
            single_recompress_list.append(path)
        for path in sorted(glob.glob(second_largeQP1_path)):
            second_largeQP1_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP1_path)):
            second_recompress_largeQP1_list.append(path)
        for path in sorted(glob.glob(second_sameQP_path)):
            second_sameQP_list.append(path)
        for path in sorted(glob.glob(second_recompress_sameQP_path)):
            second_recompress_sameQP_list.append(path)
        for path in sorted(glob.glob(second_largeQP2_path)):
            second_largeQP2_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP2_path)):
            second_recompress_largeQP2_list.append(path)

# 各カテゴリのCSVリストを生成
for train_list, single_list, single_recompress_list in zip(train_lists, 
                                                           csv_single_listsA,
                                                           csv_single_recompress_listsA):
    process_csv_lists(rootpath_csv, train_list, single_list, single_recompress_list, 
                      [], [], [], [], [], [])


for train_list, second_largeQP1_list, second_recompress_largeQP1_list, second_sameQP_list, second_recompress_sameQP_list, second_largeQP2_list, second_recompress_largeQP2_list in zip(train_lists, 
                                                                                                                                                                                                                   csv_second_largeQP1_listsA,
                                                                                                                                                                                                                   csv_second_recompress_largeQP1_listsA,
                                                                                                                                                                                                                   csv_second_sameQP_listsA,
                                                                                                                                                                                                                   csv_second_recompress_sameQP_listsA,
                                                                                                                                                                                                                   csv_second_largeQP2_listsA,
                                                                                                                                                                                                                   csv_second_recompress_largeQP2_listsA):
    process_csv_lists(rootpath_csv, train_list, [], [], 
                      second_largeQP1_list, second_recompress_largeQP1_list, 
                      second_sameQP_list, second_recompress_sameQP_list,
                      second_largeQP2_list, second_recompress_largeQP2_list)

    

    
# 出力リストを初期化
pkl_single_listsA = [[] for _ in range(10)]
pkl_single_recompress_listsA = [[] for _ in range(10)]
pkl_second_largeQP1_listsA = [[] for _ in range(10)]
pkl_second_recompress_largeQP1_listsA = [[] for _ in range(10)]
pkl_second_sameQP_listsA = [[] for _ in range(10)]
pkl_second_recompress_sameQP_listsA = [[] for _ in range(10)]
pkl_second_largeQP2_listsA = [[] for _ in range(10)]
pkl_second_recompress_largeQP2_listsA = [[] for _ in range(10)]    

def process_train_lists_pkl(rootpath, train_list, single_list, single_recompress_list, 
                            second_largeQP1_list, second_recompress_largeQP1_list, 
                            second_sameQP_list, second_recompress_sameQP_list,
                            second_largeQP2_list, second_recompress_largeQP2_list):
    
    for image in train_list:
        single_path = osp.join(rootpath, f'pkl_single/{image}_*')
        single_recompress_path = osp.join(rootpath, f'pkl_second_sameQP/{image}_*')
        
        second_largeQP1_path = osp.join(rootpath, f'pkl_second/{image}_*')
        second_recompress_largeQP1_path = osp.join(rootpath, f'pkl_triple/{image}_*')
        
        second_sameQP_path = osp.join(rootpath, f'pkl_second_sameQP/{image}_*')
        second_recompress_sameQP_path = osp.join(rootpath, f'pkl_triple_sameQP/{image}_*')
        
        second_largeQP2_path = osp.join(rootpath, f'pkl_second_largeQP/{image}_*')
        second_recompress_largeQP2_path = osp.join(rootpath, f'pkl_triple_largeQP/{image}_*')
        

        for path in sorted(glob.glob(single_path)):
            single_list.append(path)
        for path in sorted(glob.glob(single_recompress_path)):
            single_recompress_list.append(path)
            
        for path in sorted(glob.glob(second_largeQP1_path)):
            second_largeQP1_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP1_path)):
            second_recompress_largeQP1_list.append(path)
                
        for path in sorted(glob.glob(second_sameQP_path)):
            second_sameQP_list.append(path)
        for path in sorted(glob.glob(second_recompress_sameQP_path)):
            second_recompress_sameQP_list.append(path)
            
        for path in sorted(glob.glob(second_largeQP2_path)):
            second_largeQP2_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP2_path)):
            second_recompress_largeQP2_list.append(path)

# 各カテゴリのリストを生成
for train_list, single_list, single_recompress_list in zip(train_lists, 
                                                           pkl_single_listsA,
                                                           pkl_single_recompress_listsA):
    process_train_lists_pkl(rootpath_pkl, train_list, single_list, single_recompress_list, 
                            [], [], [], [], [], [])


for train_list, second_largeQP1_list, second_recompress_largeQP1_list, second_sameQP_list, second_recompress_sameQP_list, second_largeQP2_list, second_recompress_largeQP2_list in zip(train_lists, 
                                                                                                                                                                                                                   pkl_second_largeQP1_listsA,
                                                                                                                                                                                                                   pkl_second_recompress_largeQP1_listsA,
                                                                                                                                                                                                                   pkl_second_sameQP_listsA,
                                                                                                                                                                                                                   pkl_second_recompress_sameQP_listsA,
                                                                                                                                                                                                                   pkl_second_largeQP2_listsA,
                                                                                                                                                                                                                   pkl_second_recompress_largeQP2_listsA):
    process_train_lists_pkl(rootpath_pkl, train_list, [], [], 
                            second_largeQP1_list, second_recompress_largeQP1_list, 
                            second_sameQP_list, second_recompress_sameQP_list,
                            second_largeQP2_list, second_recompress_largeQP2_list)


print("\nCSV Single ListsA:")
for i, lst in enumerate(csv_single_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Single Recompress ListsA:")
for i, lst in enumerate(csv_single_recompress_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Large QP1 ListsA:")
for i, lst in enumerate(csv_second_largeQP1_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Recompress Large QP1 ListsA:")
for i, lst in enumerate(csv_second_recompress_largeQP1_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Same QP ListsA:")
for i, lst in enumerate(csv_second_sameQP_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Recompress Same QP ListsA:")
for i, lst in enumerate(csv_second_recompress_sameQP_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Large QP2 ListsA:")
for i, lst in enumerate(csv_second_largeQP2_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Recompress Large QP2 ListsA:")
for i, lst in enumerate(csv_second_recompress_largeQP2_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

# 出力リストを表示
print("\nPKL Single ListsA:")
for i, lst in enumerate(pkl_single_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Single Recompress ListsA:")
for i, lst in enumerate(pkl_single_recompress_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Large QP1 ListsA:")
for i, lst in enumerate(pkl_second_largeQP1_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Recompress Large QP1 ListsA:")
for i, lst in enumerate(pkl_second_recompress_largeQP1_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Same QP ListsA:")
for i, lst in enumerate(pkl_second_sameQP_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Recompress Same QP ListsA:")
for i, lst in enumerate(pkl_second_recompress_sameQP_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Large QP2 ListsA:")
for i, lst in enumerate(pkl_second_largeQP2_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Recompress Large QP2 ListsA:")
for i, lst in enumerate(pkl_second_recompress_largeQP2_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

[['150', '284', '241', '207', '8', '244', '16', '208', '271', '228', '191', '297', '135', '82', '230', '108', '105', '264', '102', '287', '213', '34', '248', '42', '107', '209'], ['192', '269', '144', '20', '219', '64', '250', '113', '115', '60', '141', '295', '237', '84', '240', '54', '288', '146', '225', '69', '111', '68', '290', '246', '101', '65'], ['95', '272', '46', '37', '266', '21', '291', '93', '263', '124', '2', '91', '55', '114', '294', '258', '52', '118', '88', '94', '273', '74', '62', '270', '4', '106'], ['25', '45', '90', '202', '283', '222', '56', '195', '224', '7', '262', '40', '127', '38', '86', '79', '280', '132', '23', '242', '44', '253', '298', '220', '214', '53'], ['238', '234', '235', '61', '80', '77', '204', '19', '299', '139', '116', '211', '293', '268', '121', '217', '142', '261', '215', '232', '140', '148', '267', '199', '66', '110'], ['17', '117', '125', '120', '226', '85', '33', '71', '103', '15', '260', '193', '205', '254', '92', '76', '104', '203', '14', '

In [4]:
# single_listsおよびsingle_recompress_listsは初期化されている前提
single_csv1 = list(zip(csv_single_listsA[0], pkl_single_listsA[0], csv_single_recompress_listsA[0], pkl_single_recompress_listsA[0]))
single_csv2 = list(zip(csv_single_listsA[1], pkl_single_listsA[1], csv_single_recompress_listsA[1], pkl_single_recompress_listsA[1]))
single_csv3 = list(zip(csv_single_listsA[2], pkl_single_listsA[2], csv_single_recompress_listsA[2], pkl_single_recompress_listsA[2]))
single_csv4 = list(zip(csv_single_listsA[3], pkl_single_listsA[3], csv_single_recompress_listsA[3], pkl_single_recompress_listsA[3]))
single_csv5 = list(zip(csv_single_listsA[4], pkl_single_listsA[4], csv_single_recompress_listsA[4], pkl_single_recompress_listsA[4]))
single_csv6 = list(zip(csv_single_listsA[5], pkl_single_listsA[5], csv_single_recompress_listsA[5], pkl_single_recompress_listsA[5]))
single_csv7 = list(zip(csv_single_listsA[6], pkl_single_listsA[6], csv_single_recompress_listsA[6], pkl_single_recompress_listsA[6]))
single_csv8 = list(zip(csv_single_listsA[7], pkl_single_listsA[7], csv_single_recompress_listsA[7], pkl_single_recompress_listsA[7]))
single_csv9 = list(zip(csv_single_listsA[8], pkl_single_listsA[8], csv_single_recompress_listsA[8], pkl_single_recompress_listsA[8]))
single_csv10 = list(zip(csv_single_listsA[9], pkl_single_listsA[9], csv_single_recompress_listsA[9], pkl_single_recompress_listsA[9]))

single_csv1 = random.sample(single_csv1, 240)
single_csv2 = random.sample(single_csv2, 240)
single_csv3 = random.sample(single_csv3, 240)
single_csv4 = random.sample(single_csv4, 240)
single_csv5 = random.sample(single_csv5, 240)
single_csv6 = random.sample(single_csv6, 240)
single_csv7 = random.sample(single_csv7, 240)
single_csv8 = random.sample(single_csv8, 240)
single_csv9 = random.sample(single_csv9, 240)
single_csv10 = random.sample(single_csv10, 240)
print(len(single_csv1))


240


In [5]:
# Large_QP1
second_largeQP1_csv1 = list(zip(csv_second_largeQP1_listsA[0], pkl_second_largeQP1_listsA[0], csv_second_recompress_largeQP1_listsA[0], pkl_second_recompress_largeQP1_listsA[0]))
second_largeQP1_csv2 = list(zip(csv_second_largeQP1_listsA[1], pkl_second_largeQP1_listsA[1], csv_second_recompress_largeQP1_listsA[1], pkl_second_recompress_largeQP1_listsA[1]))
second_largeQP1_csv3 = list(zip(csv_second_largeQP1_listsA[2], pkl_second_largeQP1_listsA[2], csv_second_recompress_largeQP1_listsA[2], pkl_second_recompress_largeQP1_listsA[2]))
second_largeQP1_csv4 = list(zip(csv_second_largeQP1_listsA[3], pkl_second_largeQP1_listsA[3], csv_second_recompress_largeQP1_listsA[3], pkl_second_recompress_largeQP1_listsA[3]))
second_largeQP1_csv5 = list(zip(csv_second_largeQP1_listsA[4], pkl_second_largeQP1_listsA[4], csv_second_recompress_largeQP1_listsA[4], pkl_second_recompress_largeQP1_listsA[4]))
second_largeQP1_csv6 = list(zip(csv_second_largeQP1_listsA[5], pkl_second_largeQP1_listsA[5], csv_second_recompress_largeQP1_listsA[5], pkl_second_recompress_largeQP1_listsA[5]))
second_largeQP1_csv7 = list(zip(csv_second_largeQP1_listsA[6], pkl_second_largeQP1_listsA[6], csv_second_recompress_largeQP1_listsA[6], pkl_second_recompress_largeQP1_listsA[6]))
second_largeQP1_csv8 = list(zip(csv_second_largeQP1_listsA[7], pkl_second_largeQP1_listsA[7], csv_second_recompress_largeQP1_listsA[7], pkl_second_recompress_largeQP1_listsA[7]))
second_largeQP1_csv9 = list(zip(csv_second_largeQP1_listsA[8], pkl_second_largeQP1_listsA[8], csv_second_recompress_largeQP1_listsA[8], pkl_second_recompress_largeQP1_listsA[8]))
second_largeQP1_csv10 = list(zip(csv_second_largeQP1_listsA[9], pkl_second_largeQP1_listsA[9], csv_second_recompress_largeQP1_listsA[9], pkl_second_recompress_largeQP1_listsA[9]))
print(len(second_largeQP1_csv1))


second_largeQP1_csv1 = random.sample(second_largeQP1_csv1, 80)
second_largeQP1_csv2 = random.sample(second_largeQP1_csv2, 80)
second_largeQP1_csv3 = random.sample(second_largeQP1_csv3, 80)
second_largeQP1_csv4 = random.sample(second_largeQP1_csv4, 80)
second_largeQP1_csv5 = random.sample(second_largeQP1_csv5, 80)
second_largeQP1_csv6 = random.sample(second_largeQP1_csv6, 80)
second_largeQP1_csv7 = random.sample(second_largeQP1_csv7, 80)
second_largeQP1_csv8 = random.sample(second_largeQP1_csv8, 80)
second_largeQP1_csv9 = random.sample(second_largeQP1_csv9, 80)
second_largeQP1_csv10 = random.sample(second_largeQP1_csv10, 80)
# second_largeQP1_csv10 = selected_data[9]
print('\ndouble images train by QP1>QP2: ', len(second_largeQP1_csv10))


1482

double images train by QP1>QP2:  80


In [6]:
# sameQP
second_sameQP_csv1 = list(zip(csv_second_sameQP_listsA[0], pkl_second_sameQP_listsA[0], csv_second_recompress_sameQP_listsA[0], pkl_second_recompress_sameQP_listsA[0]))
second_sameQP_csv2 = list(zip(csv_second_sameQP_listsA[1], pkl_second_sameQP_listsA[1], csv_second_recompress_sameQP_listsA[1], pkl_second_recompress_sameQP_listsA[1]))
second_sameQP_csv3 = list(zip(csv_second_sameQP_listsA[2], pkl_second_sameQP_listsA[2], csv_second_recompress_sameQP_listsA[2], pkl_second_recompress_sameQP_listsA[2]))
second_sameQP_csv4 = list(zip(csv_second_sameQP_listsA[3], pkl_second_sameQP_listsA[3], csv_second_recompress_sameQP_listsA[3], pkl_second_recompress_sameQP_listsA[3]))
second_sameQP_csv5 = list(zip(csv_second_sameQP_listsA[4], pkl_second_sameQP_listsA[4], csv_second_recompress_sameQP_listsA[4], pkl_second_recompress_sameQP_listsA[4]))
second_sameQP_csv6 = list(zip(csv_second_sameQP_listsA[5], pkl_second_sameQP_listsA[5], csv_second_recompress_sameQP_listsA[5], pkl_second_recompress_sameQP_listsA[5]))
second_sameQP_csv7 = list(zip(csv_second_sameQP_listsA[6], pkl_second_sameQP_listsA[6], csv_second_recompress_sameQP_listsA[6], pkl_second_recompress_sameQP_listsA[6]))
second_sameQP_csv8 = list(zip(csv_second_sameQP_listsA[7], pkl_second_sameQP_listsA[7], csv_second_recompress_sameQP_listsA[7], pkl_second_recompress_sameQP_listsA[7]))
second_sameQP_csv9 = list(zip(csv_second_sameQP_listsA[8], pkl_second_sameQP_listsA[8], csv_second_recompress_sameQP_listsA[8], pkl_second_recompress_sameQP_listsA[8]))
second_sameQP_csv10 = list(zip(csv_second_sameQP_listsA[9], pkl_second_sameQP_listsA[9], csv_second_recompress_sameQP_listsA[9], pkl_second_recompress_sameQP_listsA[9]))
print(len(second_sameQP_csv10))

second_sameQP_csv1 = random.sample(second_sameQP_csv1, 80)
second_sameQP_csv2 = random.sample(second_sameQP_csv2, 80)
second_sameQP_csv3 = random.sample(second_sameQP_csv3, 80)
second_sameQP_csv4 = random.sample(second_sameQP_csv4, 80)
second_sameQP_csv5 = random.sample(second_sameQP_csv5, 80)
second_sameQP_csv6 = random.sample(second_sameQP_csv6, 80)
second_sameQP_csv7 = random.sample(second_sameQP_csv7, 80)
second_sameQP_csv8 = random.sample(second_sameQP_csv8, 80)
second_sameQP_csv9 = random.sample(second_sameQP_csv9, 80)
second_sameQP_csv10 = random.sample(second_sameQP_csv10, 80)
print('\ndouble images train by QP1=QP2: ',len(second_sameQP_csv10))


260

double images train by QP1=QP2:  80


In [7]:
# Large_QP2
second_largeQP2_csv1 = list(zip(csv_second_largeQP2_listsA[0], pkl_second_largeQP2_listsA[0], csv_second_recompress_largeQP2_listsA[0], pkl_second_recompress_largeQP2_listsA[0]))
second_largeQP2_csv2 = list(zip(csv_second_largeQP2_listsA[1], pkl_second_largeQP2_listsA[1], csv_second_recompress_largeQP2_listsA[1], pkl_second_recompress_largeQP2_listsA[1]))
second_largeQP2_csv3 = list(zip(csv_second_largeQP2_listsA[2], pkl_second_largeQP2_listsA[2], csv_second_recompress_largeQP2_listsA[2], pkl_second_recompress_largeQP2_listsA[2]))
second_largeQP2_csv4 = list(zip(csv_second_largeQP2_listsA[3], pkl_second_largeQP2_listsA[3], csv_second_recompress_largeQP2_listsA[3], pkl_second_recompress_largeQP2_listsA[3]))
second_largeQP2_csv5 = list(zip(csv_second_largeQP2_listsA[4], pkl_second_largeQP2_listsA[4], csv_second_recompress_largeQP2_listsA[4], pkl_second_recompress_largeQP2_listsA[4]))
second_largeQP2_csv6 = list(zip(csv_second_largeQP2_listsA[5], pkl_second_largeQP2_listsA[5], csv_second_recompress_largeQP2_listsA[5], pkl_second_recompress_largeQP2_listsA[5]))
second_largeQP2_csv7 = list(zip(csv_second_largeQP2_listsA[6], pkl_second_largeQP2_listsA[6], csv_second_recompress_largeQP2_listsA[6], pkl_second_recompress_largeQP2_listsA[6]))
second_largeQP2_csv8 = list(zip(csv_second_largeQP2_listsA[7], pkl_second_largeQP2_listsA[7], csv_second_recompress_largeQP2_listsA[7], pkl_second_recompress_largeQP2_listsA[7]))
second_largeQP2_csv9 = list(zip(csv_second_largeQP2_listsA[8], pkl_second_largeQP2_listsA[8], csv_second_recompress_largeQP2_listsA[8], pkl_second_recompress_largeQP2_listsA[8]))
second_largeQP2_csv10 = list(zip(csv_second_largeQP2_listsA[9], pkl_second_largeQP2_listsA[9], csv_second_recompress_largeQP2_listsA[9], pkl_second_recompress_largeQP2_listsA[9]))
print(len(second_largeQP2_csv1))

second_largeQP2_csv1 = random.sample(second_largeQP2_csv1, 80)
second_largeQP2_csv2 = random.sample(second_largeQP2_csv2, 80)
second_largeQP2_csv3 = random.sample(second_largeQP2_csv3, 80)
second_largeQP2_csv4 = random.sample(second_largeQP2_csv4, 80)
second_largeQP2_csv5 = random.sample(second_largeQP2_csv5, 80)
second_largeQP2_csv6 = random.sample(second_largeQP2_csv6, 80)
second_largeQP2_csv7 = random.sample(second_largeQP2_csv7, 80)
second_largeQP2_csv8 = random.sample(second_largeQP2_csv8, 80)
second_largeQP2_csv9 = random.sample(second_largeQP2_csv9, 80)
second_largeQP2_csv10 = random.sample(second_largeQP2_csv10, 80)
# second_largeQP2_csv10 = selected_data[9]
print('\ndouble images train by QP1<QP2: ', len(second_largeQP2_csv10))


1014

double images train by QP1<QP2:  80


In [8]:
QP2 = ["_1stQP2_"]
QP4 = ["_1stQP4_"]
QP12 = ["_1stQP12_"]

In [9]:
QP4_QP2 = ["_1stQP4_2ndQP2_"]
QP12_QP2 = ["_1stQP12_2ndQP2_"]
QP12_QP4 = ["_1stQP12_2ndQP4_"]

In [10]:
QP2_QP2 = ["_1stQP2_2ndQP2"]
QP4_QP4 = ["_1stQP4_2ndQP4"]
QP12_QP12 = ["_1stQP12_2ndQP12"]

In [11]:
QP2_QP4 = ["_1stQP2_2ndQP4"]
QP2_QP12 = ["_1stQP2_2ndQP12"]
QP4_QP12 = ["_1stQP4_2ndQP12"]

In [12]:
rootpath2 = "/Prove/Yoshihisa/HEIF_ghost/EXPERIMENT_DIFFERENT_SOFTWARE/"

# SINGLE
GIMP_path1 = os.path.join(rootpath2, 'GIMP_csv')
GIMP_path2 = os.path.join(rootpath2, 'GIMP_RECOMPRESSED_csv')

GIMP_path1_csv = [os.path.join(GIMP_path1, file) for file in sorted(os.listdir(GIMP_path1))]
GIMP_path2_csv = [os.path.join(GIMP_path2, file) for file in sorted(os.listdir(GIMP_path2))]

LIBHEIF_path1 = os.path.join(rootpath2, 'LIBHEIF_csv')
LIBHEIF_path2 = os.path.join(rootpath2, 'LIBHEIF_RECOMPRESSED_csv')

LIBHEIF_path1_csv = [os.path.join(LIBHEIF_path1, file) for file in sorted(os.listdir(LIBHEIF_path1))]
LIBHEIF_path2_csv = [os.path.join(LIBHEIF_path2, file) for file in sorted(os.listdir(LIBHEIF_path2))]


# DOUBLE
GIMP_GIMP_path1 = os.path.join(rootpath2, 'GIMP_GIMP_csv')
GIMP_GIMP_path2 = os.path.join(rootpath2, 'GIMP_GIMP_RECOMPRESSED_csv')

GIMP_GIMP_path1_csv = [os.path.join(GIMP_GIMP_path1, file) for file in sorted(os.listdir(GIMP_GIMP_path1))]
GIMP_GIMP_path2_csv = [os.path.join(GIMP_GIMP_path2, file) for file in sorted(os.listdir(GIMP_GIMP_path2))]

LIBHEIF_GIMP_path1 = os.path.join(rootpath2, 'LIBHEIF_GIMP_csv')
LIBHEIF_GIMP_path2 = os.path.join(rootpath2, 'LIBHEIF_GIMP_RECOMPRESSED_csv')

LIBHEIF_GIMP_path1_csv = [os.path.join(LIBHEIF_GIMP_path1, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path1))]
LIBHEIF_GIMP_path2_csv = [os.path.join(LIBHEIF_GIMP_path2, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path2))]

GIMP_LIBHEIF_path1 = os.path.join(rootpath2, 'GIMP_LIBHEIF_csv')
GIMP_LIBHEIF_path2 = os.path.join(rootpath2, 'GIMP_LIBHEIF_RECOMPRESSED_csv')

GIMP_LIBHEIF_path1_csv = [os.path.join(GIMP_LIBHEIF_path1, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path1))]
GIMP_LIBHEIF_path2_csv = [os.path.join(GIMP_LIBHEIF_path2, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path2))]


In [13]:
rootpath2 = "/Prove/Yoshihisa/HEIF_ghost/EXPERIMENT_DIFFERENT_SOFTWARE/"

# SINGLE
GIMP_path1 = os.path.join(rootpath2, 'GIMP_csv')
GIMP_path2 = os.path.join(rootpath2, 'GIMP_RECOMPRESSED_csv')

GIMP_path1_csv = [os.path.join(GIMP_path1, file) for file in sorted(os.listdir(GIMP_path1))]
GIMP_path2_csv = [os.path.join(GIMP_path2, file) for file in sorted(os.listdir(GIMP_path2))]

LIBHEIF_path1 = os.path.join(rootpath2, 'LIBHEIF_csv')
LIBHEIF_path2 = os.path.join(rootpath2, 'LIBHEIF_RECOMPRESSED_csv')

LIBHEIF_path1_csv = [os.path.join(LIBHEIF_path1, file) for file in sorted(os.listdir(LIBHEIF_path1))]
LIBHEIF_path2_csv = [os.path.join(LIBHEIF_path2, file) for file in sorted(os.listdir(LIBHEIF_path2))]


# DOUBLE
GIMP_GIMP_path1 = os.path.join(rootpath2, 'GIMP_GIMP_csv')
GIMP_GIMP_path2 = os.path.join(rootpath2, 'GIMP_GIMP_RECOMPRESSED_csv')

GIMP_GIMP_path1_csv = [os.path.join(GIMP_GIMP_path1, file) for file in sorted(os.listdir(GIMP_GIMP_path1))]
GIMP_GIMP_path2_csv = [os.path.join(GIMP_GIMP_path2, file) for file in sorted(os.listdir(GIMP_GIMP_path2))]

LIBHEIF_GIMP_path1 = os.path.join(rootpath2, 'LIBHEIF_GIMP_csv')
LIBHEIF_GIMP_path2 = os.path.join(rootpath2, 'LIBHEIF_GIMP_RECOMPRESSED_csv')

LIBHEIF_GIMP_path1_csv = [os.path.join(LIBHEIF_GIMP_path1, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path1))]
LIBHEIF_GIMP_path2_csv = [os.path.join(LIBHEIF_GIMP_path2, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path2))]

GIMP_LIBHEIF_path1 = os.path.join(rootpath2, 'GIMP_LIBHEIF_csv')
GIMP_LIBHEIF_path2 = os.path.join(rootpath2, 'GIMP_LIBHEIF_RECOMPRESSED_csv')

GIMP_LIBHEIF_path1_csv = [os.path.join(GIMP_LIBHEIF_path1, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path1))]
GIMP_LIBHEIF_path2_csv = [os.path.join(GIMP_LIBHEIF_path2, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path2))]

In [14]:
rootpath3 = "/Prove/Yoshihisa/HEIF_ghost/EXPERIMENT_DIFFERENT_SOFTWARE/PKL/"

# SINGLE
GIMP_path1 = os.path.join(rootpath3, 'pkl_GIMP')
GIMP_path2 = os.path.join(rootpath3, 'pkl_GIMP_RECOMPRESSED')

GIMP_path1_pkl = [os.path.join(GIMP_path1, file) for file in sorted(os.listdir(GIMP_path1))]
GIMP_path2_pkl = [os.path.join(GIMP_path2, file) for file in sorted(os.listdir(GIMP_path2))]

LIBHEIF_path1 = os.path.join(rootpath3, 'pkl_LIBHEIF')
LIBHEIF_path2 = os.path.join(rootpath3, 'pkl_LIBHEIF_RECOMPRESSED')

LIBHEIF_path1_pkl = [os.path.join(LIBHEIF_path1, file) for file in sorted(os.listdir(LIBHEIF_path1))]
LIBHEIF_path2_pkl = [os.path.join(LIBHEIF_path2, file) for file in sorted(os.listdir(LIBHEIF_path2))]


# DOUBLE
GIMP_GIMP_path1 = os.path.join(rootpath3, 'pkl_GIMP_GIMP')
GIMP_GIMP_path2 = os.path.join(rootpath3, 'pkl_GIMP_GIMP_RECOMPRESSED')

GIMP_GIMP_path1_pkl = [os.path.join(GIMP_GIMP_path1, file) for file in sorted(os.listdir(GIMP_GIMP_path1))]
GIMP_GIMP_path2_pkl = [os.path.join(GIMP_GIMP_path2, file) for file in sorted(os.listdir(GIMP_GIMP_path2))]

LIBHEIF_GIMP_path1 = os.path.join(rootpath3, 'pkl_LIBHEIF_GIMP')
LIBHEIF_GIMP_path2 = os.path.join(rootpath3, 'pkl_LIBHEIF_GIMP_RECOMPRESSED')

LIBHEIF_GIMP_path1_pkl = [os.path.join(LIBHEIF_GIMP_path1, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path1))]
LIBHEIF_GIMP_path2_pkl = [os.path.join(LIBHEIF_GIMP_path2, file) for file in sorted(os.listdir(LIBHEIF_GIMP_path2))]

GIMP_LIBHEIF_path1 = os.path.join(rootpath3, 'pkl_GIMP_LIBHEIF')
GIMP_LIBHEIF_path2 = os.path.join(rootpath3, 'pkl_GIMP_LIBHEIF_RECOMPRESSED')

GIMP_LIBHEIF_path1_pkl = [os.path.join(GIMP_LIBHEIF_path1, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path1))]
GIMP_LIBHEIF_path2_pkl = [os.path.join(GIMP_LIBHEIF_path2, file) for file in sorted(os.listdir(GIMP_LIBHEIF_path2))]

In [15]:
GIMP_csv = list(zip(GIMP_path1_csv, GIMP_path1_pkl, GIMP_path2_csv, GIMP_path2_pkl))


single_QP2_GIMP = [item for item in GIMP_csv if any(qp in item[0] for qp in QP2)]
single_QP4_GIMP = [item for item in GIMP_csv if any(qp in item[0] for qp in QP4)]
single_QP12_GIMP = [item for item in GIMP_csv if any(qp in item[0] for qp in QP12)]

single_QP2_GIMP = random.sample(single_QP2_GIMP, 10)
single_QP4_GIMP = random.sample(single_QP4_GIMP, 10)
single_QP12_GIMP = random.sample(single_QP12_GIMP, 10)

LIBHEIF_csv = list(zip(LIBHEIF_path1_csv, LIBHEIF_path1_pkl, LIBHEIF_path2_csv, LIBHEIF_path2_pkl))
# LIBHEIF_csv1 = random.sample(LIBHEIF_csv, 10)

single_QP2_LIBHEIF = [item for item in LIBHEIF_csv if any(qp in item[0] for qp in QP2)]
single_QP4_LIBHEIF = [item for item in LIBHEIF_csv if any(qp in item[0] for qp in QP4)]
single_QP12_LIBHEIF = [item for item in LIBHEIF_csv if any(qp in item[0] for qp in QP12)]

single_QP2_LIBHEIF = random.sample(single_QP2_LIBHEIF, 10)
single_QP4_LIBHEIF = random.sample(single_QP4_LIBHEIF, 10)
single_QP12_LIBHEIF = random.sample(single_QP12_LIBHEIF, 10)


GIMP_GIMP_csv = list(zip(GIMP_GIMP_path1_csv, GIMP_GIMP_path1_pkl, GIMP_GIMP_path2_csv, GIMP_GIMP_path2_pkl))

second_QP4_QP2_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP4_QP2)]
second_QP12_QP2_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP12_QP2)]
second_QP12_QP4_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP12_QP4)]

second_QP2_QP2_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP2_QP2)]
second_QP4_QP4_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP4_QP4)]
second_QP12_QP12_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP12_QP12)]

second_QP2_QP4_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP2_QP4)]
second_QP2_QP12_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP2_QP12)]
second_QP4_QP12_GG = [item for item in GIMP_GIMP_csv if any(qp in item[0] for qp in QP4_QP12)]

LIBHEIF_GIMP_csv = list(zip(LIBHEIF_GIMP_path1_csv, LIBHEIF_GIMP_path1_pkl, LIBHEIF_GIMP_path2_csv, LIBHEIF_GIMP_path2_pkl))

second_QP4_QP2_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP4_QP2)]
second_QP12_QP2_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP12_QP2)]
second_QP12_QP4_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP12_QP4)]

second_QP2_QP2_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP2_QP2)]
second_QP4_QP4_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP4_QP4)]
second_QP12_QP12_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP12_QP12)]

second_QP2_QP4_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP2_QP4)]
second_QP2_QP12_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP2_QP12)]
second_QP4_QP12_LG = [item for item in LIBHEIF_GIMP_csv if any(qp in item[0] for qp in QP4_QP12)]

GIMP_LIBHEIF_csv = list(zip(GIMP_LIBHEIF_path1_csv, GIMP_LIBHEIF_path1_pkl, GIMP_LIBHEIF_path2_csv, GIMP_LIBHEIF_path2_pkl))

second_QP4_QP2_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP4_QP2)]
second_QP12_QP2_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP12_QP2)]
second_QP12_QP4_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP12_QP4)]

second_QP2_QP2_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP2_QP2)]
second_QP4_QP4_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP4_QP4)]
second_QP12_QP12_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP12_QP12)]

second_QP2_QP4_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP2_QP4)]
second_QP2_QP12_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP2_QP12)]
second_QP4_QP12_GL = [item for item in GIMP_LIBHEIF_csv if any(qp in item[0] for qp in QP4_QP12)]

# print(len(single_QP2_GIMP))
# print(len(single_QP2_LIBHEIF))
# print(len(second_QP4_QP2_GG))
# print(len(second_QP4_QP2_LG))
# print(len(second_QP4_QP2_GL))

In [16]:
# Training_data
train_csv_list1 = single_csv1 + second_largeQP1_csv1 + second_sameQP_csv1 + second_largeQP2_csv1
train_csv_list2 = single_csv2 + second_largeQP1_csv2 + second_sameQP_csv2 + second_largeQP2_csv2
train_csv_list3 = single_csv3 + second_largeQP1_csv3 + second_sameQP_csv3 + second_largeQP2_csv3
train_csv_list4 = single_csv4 + second_largeQP1_csv4 + second_sameQP_csv4 + second_largeQP2_csv4
train_csv_list5 = single_csv5 + second_largeQP1_csv5 + second_sameQP_csv5 + second_largeQP2_csv5
train_csv_list6 = single_csv6 + second_largeQP1_csv6 + second_sameQP_csv6 + second_largeQP2_csv6
train_csv_list7 = single_csv7 + second_largeQP1_csv7 + second_sameQP_csv7 + second_largeQP2_csv7
train_csv_list8 = single_csv8 + second_largeQP1_csv8 + second_sameQP_csv8 + second_largeQP2_csv8
train_csv_list9 = single_csv9 + second_largeQP1_csv9 + second_sameQP_csv9 + second_largeQP2_csv9
train_csv_list10 = single_csv10 + second_largeQP1_csv10 + second_sameQP_csv10 + second_largeQP2_csv10
print("train_csv_list: ", len(train_csv_list10))

train_csv_list:  480


In [17]:
test_QP4_QP2_GG = second_QP4_QP2_GG + single_QP2_GIMP
test_QP12_QP2_GG = second_QP12_QP2_GG + single_QP2_GIMP
test_QP12_QP4_GG = second_QP12_QP4_GG + single_QP4_GIMP

test_QP4_QP2_LG = second_QP4_QP2_LG + single_QP2_GIMP
test_QP12_QP2_LG = second_QP12_QP2_LG + single_QP2_GIMP
test_QP12_QP4_LG = second_QP12_QP4_LG + single_QP4_GIMP

test_QP4_QP2_GL = second_QP4_QP2_GL + single_QP2_LIBHEIF
test_QP12_QP2_GL = second_QP12_QP2_GL + single_QP2_LIBHEIF
test_QP12_QP4_GL = second_QP12_QP4_GL + single_QP4_LIBHEIF

print('test_QP12_QP4_GG: ', len(test_QP12_QP4_GG))
print('test_QP12_QP4_LG: ', len(test_QP12_QP4_LG))
print('test_QP12_QP4_GL: ', len(test_QP12_QP4_GL))

test_QP12_QP4_GG:  20
test_QP12_QP4_LG:  20
test_QP12_QP4_GL:  20


In [18]:
test_QP2_QP2_GG = second_QP2_QP2_GG + single_QP2_GIMP
test_QP4_QP4_GG = second_QP4_QP4_GG + single_QP4_GIMP
test_QP12_QP12_GG = second_QP12_QP12_GG + single_QP12_GIMP

test_QP2_QP2_LG = second_QP2_QP2_LG + single_QP2_GIMP
test_QP4_QP4_LG = second_QP4_QP4_LG + single_QP4_GIMP
test_QP12_QP12_LG = second_QP12_QP12_LG + single_QP12_GIMP

test_QP2_QP2_GL = second_QP2_QP2_GL + single_QP2_LIBHEIF
test_QP4_QP4_GL = second_QP4_QP4_GL + single_QP4_LIBHEIF
test_QP12_QP12_GL = second_QP12_QP12_GL + single_QP12_LIBHEIF

print('test_QP12_QP12_GG: ', len(test_QP12_QP12_GG))
print('test_QP12_QP12_LG: ', len(test_QP12_QP12_LG))
print('test_QP12_QP12_GL: ', len(test_QP12_QP12_GL))

test_QP12_QP12_GG:  20
test_QP12_QP12_LG:  20
test_QP12_QP12_GL:  20


In [19]:
test_QP2_QP4_GG = second_QP2_QP4_GG + single_QP4_GIMP
test_QP2_QP12_GG = second_QP2_QP12_GG + single_QP12_GIMP
test_QP4_QP12_GG = second_QP4_QP12_GG + single_QP12_GIMP

test_QP2_QP4_LG = second_QP2_QP4_LG + single_QP4_GIMP
test_QP2_QP12_LG = second_QP2_QP12_LG + single_QP12_GIMP
test_QP4_QP12_LG = second_QP4_QP12_LG + single_QP12_GIMP

test_QP2_QP4_GL = second_QP2_QP4_GL + single_QP4_LIBHEIF
test_QP2_QP12_GL = second_QP2_QP12_GL + single_QP12_LIBHEIF
test_QP4_QP12_GL = second_QP4_QP12_GL + single_QP12_LIBHEIF

print('test_QP4_QP12_GG: ', len(test_QP4_QP12_GG))
print('test_QP4_QP12_LG: ', len(test_QP4_QP12_LG))
print('test_QP4_QP12_GL: ', len(test_QP4_QP12_GL))

test_QP4_QP12_GG:  20
test_QP4_QP12_LG:  20
test_QP4_QP12_GL:  20


In [20]:
def laplace_smoothing(probabilities, alpha=1):
    """
    ラプラス平滑化を行う関数
    
    Args:
    probabilities (list): 平滑化する確率分布のリスト
    alpha (float): 平滑化パラメータ
    
    Returns:
    smoothed_probabilities (list): 平滑化された確率分布のリスト
    """
    total_count = sum(probabilities)
    num_elements = len(probabilities)
    
    smoothed_probabilities = [(count + alpha) / (total_count + alpha * num_elements) for count in probabilities]
    
    return smoothed_probabilities


def process_train_csv_lists(train_csv_list):
    pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  
                  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]

#     luminance_columns = ["LU1_0","LU1_1","LU1_2","LU1_3",
#                          "LU1_4","LU1_5","LU1_6","LU1_7",
#                          "LU1_8","LU1_9","LU1_10","LU1_11",
#                          "LU1_12","LU1_13","LU1_14","LU1_15",
#                          "LU1_16","LU1_17","LU1_18","LU1_19",
#                          "LU1_20","LU1_21","LU1_22","LU1_23",
#                          "LU1_24","LU1_25","LU1_26","LU1_27",
#                          "LU1_28","LU1_29","LU1_30","LU1_31",
#                          "LU1_32","LU1_33","LU1_34",
                         
#                          "LU2_0","LU2_1","LU2_2","LU2_3",
#                          "LU2_4","LU2_5","LU2_6","LU2_7",
#                          "LU2_8","LU2_9","LU2_10","LU2_11",
#                          "LU2_12","LU2_13","LU2_14","LU2_15",
#                          "LU2_16","LU2_17","LU2_18","LU2_19",
#                          "LU2_20","LU2_21","LU2_22","LU2_23",
#                          "LU2_24","LU2_25","LU2_26","LU2_27",
#                          "LU2_28","LU2_29","LU2_30","LU2_31",
#                          "LU2_32","LU2_33","LU2_34"]
    
    luminance_columns = ["LU1_0","LU1_1","LU1_9","LU1_10","LU1_11","LU1_25","LU1_26","LU1_27",
                         "LU2_0","LU2_1","LU2_9","LU2_10","LU2_11", "LU2_25","LU2_26","LU2_27"]

    chrominance_columns = ["CH1_0", "CH1_1", "CH1_10", "CH1_26", "CH1_34", "CH1_36", 
                           "CH2_0", "CH2_1", "CH2_10", "CH2_26", "CH2_34", "CH2_36"]
    
    
    
    label_columns = ["LABEL"]
    mae1_columns = [f"MAE1_{i}" for i in range(52)]
    mae2_columns = [f"MAE2_{i}" for i in range(52)]
    mae_columns = ["MAE"]
    final_qp_columns = ["FINAL_QP"]
    kl_divergence1 = ["KLD_PU"]
    kl_divergence2 = ["KLD_LUMA"]
    kl_divergence3 = ["KLD_CHROMA"]
    ratio_columns1 = ["RATIO1"]
    ratio_columns2 = ["RATIO2"]
    
    train_df1_1 = pd.DataFrame(columns=pu_columns)
    train_df1_2 = pd.DataFrame(columns=luminance_columns)
    train_df1_3 = pd.DataFrame(columns=chrominance_columns)
    LABEL = pd.DataFrame(columns=label_columns)
    RATIO1 = pd.DataFrame(columns=ratio_columns1)
    RATIO2 = pd.DataFrame(columns=ratio_columns2)
    train_df3 = pd.DataFrame(columns=mae1_columns)
    train_df4 = pd.DataFrame(columns=mae2_columns)
    MAE = pd.DataFrame(columns=mae_columns)
    FINAL_QP = pd.DataFrame(columns=final_qp_columns)
    kl_divergence_df1 = pd.DataFrame(columns=kl_divergence1)
    kl_divergence_df2 = pd.DataFrame(columns=kl_divergence2)
    kl_divergence_df3 = pd.DataFrame(columns=kl_divergence3)

    for path1, path2, path3, path4 in train_csv_list:
        label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
        train_pkl_list = [path2, path4]
        df1 = pd.read_csv(path1)
        df2 = pd.read_csv(path3)
        
        # 平滑化を行う
        probabilities_df1 = laplace_smoothing([df1.loc[i, "pu_counts"] for i in [0,1,2,3,4]])
        probabilities_df2 = laplace_smoothing([df2.loc[i, "pu_counts"] for i in [0,1,2,3,4]])
        kl_divergence1 = entropy(probabilities_df1, probabilities_df2)
        
        probabilities_df3 = laplace_smoothing([df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]])
        probabilities_df4 = laplace_smoothing([df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]])
        kl_divergence2 = entropy(probabilities_df3, probabilities_df4)
        
        probabilities_df5 = laplace_smoothing([df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]])
        probabilities_df6 = laplace_smoothing([df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]])
        kl_divergence3 = entropy(probabilities_df5, probabilities_df6)
        
        
        pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
        # lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
        lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
        ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
        
        train_df1_1 = pd.concat([train_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
        train_df1_2= pd.concat([train_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
        train_df1_3 = pd.concat([train_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
        
        kl_divergence_df1 = pd.concat([kl_divergence_df1, pd.DataFrame({"KLD_PU": [kl_divergence1]})], ignore_index=True)
        kl_divergence_df2 = pd.concat([kl_divergence_df2, pd.DataFrame({"KLD_LUMA": [kl_divergence2]})], ignore_index=True)
        kl_divergence_df3 = pd.concat([kl_divergence_df3, pd.DataFrame({"KLD_CHROMA": [kl_divergence3]})], ignore_index=True)


        LABEL = pd.concat([LABEL, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

        final_QP = extract_finalQP(train_pkl_list[0])

        mae_d1 = calculate_mae(train_pkl_list[0])
        mae_d2 = calculate_mae(train_pkl_list[1])
        ratio1 = ratio_double_compressed(mae_d1, final_QP)
        ratio2 = ratio_double_compressed(mae_d2, final_QP)

        RATIO1 = pd.concat([RATIO1, pd.DataFrame({"RATIO1": [ratio1]})], ignore_index=True)
        RATIO2 = pd.concat([RATIO2, pd.DataFrame({"RATIO2": [ratio2]})], ignore_index=True)

        train_df3 = pd.concat([train_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1[i]] for i in range(52)})], ignore_index=True)
        train_df4 = pd.concat([train_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2[i]] for i in range(52)})], ignore_index=True)
        MAE = pd.concat([MAE, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)
        FINAL_QP = pd.concat([FINAL_QP, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)

    train_df1_1.reset_index(drop=True, inplace=True)
    train_df1_2.reset_index(drop=True, inplace=True)
    train_df1_3.reset_index(drop=True, inplace=True)
    LABEL.reset_index(drop=True, inplace=True)
    RATIO1.reset_index(drop=True, inplace=True)
    RATIO2.reset_index(drop=True, inplace=True)
    kl_divergence_df1.reset_index(drop=True, inplace=True)
    kl_divergence_df2.reset_index(drop=True, inplace=True)
    kl_divergence_df3.reset_index(drop=True, inplace=True)

    # train_df = pd.concat([train_df1_1, train_df1_2, train_df1_3, train_df3, train_df4], axis=1)
    train_df = pd.concat([FINAL_QP, train_df1_1, train_df1_2, train_df1_3, kl_divergence_df1, kl_divergence_df2, kl_divergence_df3, RATIO1, RATIO2], axis=1)
    train_df_OG = pd.concat([FINAL_QP, train_df1_1, train_df1_2, train_df1_3, kl_divergence_df1, kl_divergence_df2, kl_divergence_df3, RATIO1, RATIO2], axis=1)

    return train_df, LABEL, MAE, FINAL_QP


In [21]:
train_df1, LABEL1, MAE1, FINAL_QP1 = process_train_csv_lists(train_csv_list1)
train_df2, LABEL2, MAE2, FINAL_QP2 = process_train_csv_lists(train_csv_list2)
train_df3, LABEL3, MAE3, FINAL_QP3 = process_train_csv_lists(train_csv_list3)
train_df4, LABEL4, MAE4, FINAL_QP4 = process_train_csv_lists(train_csv_list4)
train_df5, LABEL5, MAE5, FINAL_QP5 = process_train_csv_lists(train_csv_list5)
train_df6, LABEL6, MAE6, FINAL_QP6 = process_train_csv_lists(train_csv_list6)
train_df7, LABEL7, MAE7, FINAL_QP7 = process_train_csv_lists(train_csv_list7)
train_df8, LABEL8, MAE8, FINAL_QP8 = process_train_csv_lists(train_csv_list8)
train_df9, LABEL9, MAE9, FINAL_QP9 = process_train_csv_lists(train_csv_list9)
train_df10, LABEL10, MAE10, FINAL_QP10 = process_train_csv_lists(train_csv_list10)

In [22]:
# 1番目のCSVファイルを処理する
test_df1, LABEL_t1, MAE_t1, FINAL_QP_t1 = process_train_csv_lists(test_QP4_QP2_GG)

# 2番目のCSVファイルを処理する
test_df2, LABEL_t2, MAE_t2, FINAL_QP_t2 = process_train_csv_lists(test_QP12_QP2_GG)

# 3番目のCSVファイルを処理する
test_df3, LABEL_t3, MAE_t3, FINAL_QP_t3 = process_train_csv_lists(test_QP12_QP4_GG)

# 4番目のCSVファイルを処理する
test_df4, LABEL_t4, MAE_t4, FINAL_QP_t4 = process_train_csv_lists(test_QP4_QP2_LG)

# 5番目のCSVファイルを処理する
test_df5, LABEL_t5, MAE_t5, FINAL_QP_t5 = process_train_csv_lists(test_QP12_QP2_LG)

# 6番目のCSVファイルを処理する
test_df6, LABEL_t6, MAE_t6, FINAL_QP_t6 = process_train_csv_lists(test_QP12_QP4_LG)

# 7番目のCSVファイルを処理する
test_df7, LABEL_t7, MAE_t7, FINAL_QP_t7 = process_train_csv_lists(test_QP4_QP2_GL)

# 8番目のCSVファイルを処理する
test_df8, LABEL_t8, MAE_t8, FINAL_QP_t8 = process_train_csv_lists(test_QP12_QP2_GL)

# 9番目のCSVファイルを処理する
test_df9, LABEL_t9, MAE_t9, FINAL_QP_t9 = process_train_csv_lists(test_QP12_QP4_GL)


# 10番目のCSVファイルを処理する
test_df10, LABEL_t10, MAE_t10, FINAL_QP_t10 = process_train_csv_lists(test_QP2_QP2_GG)

# 11番目のCSVファイルを処理する
test_df11, LABEL_t11, MAE_t11, FINAL_QP_t11 = process_train_csv_lists(test_QP4_QP4_GG)

# 12番目のCSVファイルを処理する
test_df12, LABEL_t12, MAE_t12, FINAL_QP_t12 = process_train_csv_lists(test_QP12_QP12_GG)

# 13番目のCSVファイルを処理する
test_df13, LABEL_t13, MAE_t13, FINAL_QP_t13 = process_train_csv_lists(test_QP2_QP2_LG)

# 14番目のCSVファイルを処理する
test_df14, LABEL_t14, MAE_t14, FINAL_QP_t14 = process_train_csv_lists(test_QP4_QP4_LG)

# 15番目のCSVファイルを処理する
test_df15, LABEL_t15, MAE_t15, FINAL_QP_t15 = process_train_csv_lists(test_QP12_QP12_LG)

# 16番目のCSVファイルを処理する
test_df16, LABEL_t16, MAE_t16, FINAL_QP_t16 = process_train_csv_lists(test_QP2_QP2_GL)

# 17番目のCSVファイルを処理する
test_df17, LABEL_t17, MAE_t17, FINAL_QP_t17 = process_train_csv_lists(test_QP4_QP4_GL)

# 18番目のCSVファイルを処理する
test_df18, LABEL_t18, MAE_t18, FINAL_QP_t18 = process_train_csv_lists(test_QP12_QP12_GL)


# 19番目のCSVファイルを処理する
test_df19, LABEL_t19, MAE_t19, FINAL_QP_t19 = process_train_csv_lists(test_QP2_QP4_GG)

# 20番目のCSVファイルを処理する
test_df20, LABEL_t20, MAE_t20, FINAL_QP_t20 = process_train_csv_lists(test_QP2_QP12_GG)

# 21番目のCSVファイルを処理する
test_df21, LABEL_t21, MAE_t21, FINAL_QP_t21 = process_train_csv_lists(test_QP4_QP12_GG)

# 22番目のCSVファイルを処理する
test_df22, LABEL_t22, MAE_t22, FINAL_QP_t22 = process_train_csv_lists(test_QP2_QP4_LG)

# 23番目のCSVファイルを処理する
test_df23, LABEL_t23, MAE_t23, FINAL_QP_t23 = process_train_csv_lists(test_QP2_QP12_LG)

# 24番目のCSVファイルを処理する
test_df24, LABEL_t24, MAE_t24, FINAL_QP_t24 = process_train_csv_lists(test_QP4_QP12_LG)

# 25番目のCSVファイルを処理する
test_df25, LABEL_t25, MAE_t25, FINAL_QP_t25 = process_train_csv_lists(test_QP2_QP4_GL)

# 26番目のCSVファイルを処理する
test_df26, LABEL_t26, MAE_t26, FINAL_QP_t26 = process_train_csv_lists(test_QP2_QP12_GL)

# 27番目のCSVファイルを処理する
test_df27, LABEL_t27, MAE_t27, FINAL_QP_t27 = process_train_csv_lists(test_QP4_QP12_GL)


In [23]:
print(train_df1)
print(test_df1)

    FINAL_QP PU1_64 PU1_32 PU1_16  PU1_8  PU1_4 PU2_64 PU2_32 PU2_16  PU2_8  PU2_4  LU1_0  LU1_1 LU1_9 LU1_10 LU1_11 LU1_25 LU1_26 LU1_27  LU2_0  LU2_1 LU2_9 LU2_10 LU2_11 LU2_25 LU2_26 LU2_27  CH1_0  CH1_1 CH1_10 CH1_26 CH1_34 CH1_36  CH2_0  CH2_1 CH2_10 CH2_26 CH2_34 CH2_36    KLD_PU  KLD_LUMA KLD_CHROMA    RATIO1    RATIO2
0         24      0  14016  13120  15268  17596      0  13952  13216  15100  17732  11121  11392  3541   3186   2414   2144   2599   1179  11371  12496  3577   3224   2518   2020   2703   1141   7752   2860   4252   3476   1180  40480   7216   3120   4328   3024   1156  41156  0.000032    0.0009   0.001154  0.081029  0.064401
1          5      0      0   3856  26576  29568      0      0   3872  25432  30696   9965   9263  1414   1557   1738   2215   2321   2058  10306  11039  1325   1558   1682   2261   2399   2042  16444   9580   8800  10620   3556  11000  16464   9836   9052  11156   3164  10328  0.000773  0.003102   0.001079  0.927031  0.700492
2         39    

In [24]:
def process_results_to_lists(train_df, LABEL, MAE, FINAL_QP):
    scaler = MinMaxScaler()

    # スケーラーを使って結合したデータをスケーリング
    X_train = scaler.fit_transform(train_df)

    # pandasをndarrayに変換
    MAE_array = MAE.values
    FINAL_QP_array = FINAL_QP.values

    # ラベルの準備
    Y_train = LABEL['LABEL'].astype(int)

    return X_train, MAE_array, FINAL_QP_array, Y_train

def append_results_to_lists(train_df, LABEL, MAE, FINAL_QP, X_train_list, MAE_list, FINAL_QP_list, Y_train_list):
    X_train, MAE_array, FINAL_QP_array, Y_train = process_results_to_lists(train_df, LABEL, MAE, FINAL_QP)
    X_train_list.append(X_train)
    # X_train_onlyGhost_list.append(X_train_onlyGhost)
    MAE_list.append(MAE_array)
    FINAL_QP_list.append(FINAL_QP_array)
    Y_train_list.append(Y_train)

# リストを初期化
X_train_list = []
MAE_list = []
FINAL_QP_list = []
Y_train_list = []

for i in range(1, 28):
    globals()[f'X_test_list{i}'] = []
    globals()[f'MAE_list_t{i}'] = []
    globals()[f'FINAL_QP_list_t{i}'] = []
    globals()[f'Y_test_list{i}'] = []


In [25]:
# データを処理してリストに追加
append_results_to_lists(train_df1, LABEL1, MAE1, FINAL_QP1, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df2, LABEL2, MAE2, FINAL_QP2, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df3, LABEL3, MAE3, FINAL_QP3, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df4, LABEL4, MAE4, FINAL_QP4, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df5, LABEL5, MAE5, FINAL_QP5, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df6, LABEL6, MAE6, FINAL_QP6, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df7, LABEL7, MAE7, FINAL_QP7, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df8, LABEL8, MAE8, FINAL_QP8, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df9, LABEL9, MAE9, FINAL_QP9, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df10, LABEL10, MAE10, FINAL_QP10, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)

In [26]:
for i in range(1, 28):
    eval(f'append_results_to_lists(test_df{i}, LABEL_t{i}, MAE_t{i}, FINAL_QP_t{i}, X_test_list{i}, MAE_list_t{i}, FINAL_QP_list_t{i}, Y_test_list{i})')

In [27]:
# Cの範囲を指定
# C_values = {'C': [0.01]}
C_values = {'C': [0.01, 0.1, 1, 10, 100, 1000, 2000, 3000, 4000, 5000]}

kfold = KFold(n_splits=10, shuffle=True, random_state=42)

# データフレームを初期化
results = pd.DataFrame()

# 1から106までの列名を作成し、データフレームに追加
columns = []
for i in range(1, 28):
    columns.extend([
        f'C_RBF{i}', f'Score_RBF{i}', f'tnr_rbf{i}', f'tpr_rbf{i}', f'AUC_RBF{i}',
        f'C_LINEAR{i}', f'Score_LINEAR{i}', f'tnr_linear{i}', f'tpr_linear{i}', f'AUC_LINEAR{i}',
        f'Threshold{i}', f'Score_old{i}', f'tnr_old{i}', f'tpr_old{i}', f'AUC_old{i}'
    ])
results = pd.DataFrame(columns=columns)

X_index = np.arange(10)  # インデックスとして0から9までの数字を用意

# ループで各分割のtrain_idsとtest_idsを取得
for fold, (train_ids, test_ids) in enumerate(kfold.split(X_index)):
    print(f"<Fold-{fold+1}>")
    print("Train indices:", train_ids)
    print("Test indices:", test_ids)
    
    train_data = [X_train_list[i] for i in train_ids]
    train_label = [Y_train_list[i] for i in train_ids]
    
    val_data = [X_train_list[i] for i in test_ids]
    val_label = [Y_train_list[i] for i in test_ids]
        
    X_train = [item for data in train_data for item in data]
    Y_train = [item for data in train_label for item in data]
    
    X_val = [item for data in val_data for item in data]
    Y_val = [item for data in val_label for item in data]
    
    print(len(Y_train))
    print(len(Y_val))
    
    # リストの作成（1から106まで）
    for i in range(1, 28):
        globals()[f'test_data{i}'] = [item for data in globals()[f'X_test_list{i}'] for item in data]
        globals()[f'test_label{i}'] = [item for data in globals()[f'Y_test_list{i}'] for item in data]
        globals()[f'MAE_data{i}'] = [item for data in globals()[f'MAE_list_t{i}'] for item in data]
        globals()[f'FINAL_QP_data{i}'] = [item for data in globals()[f'FINAL_QP_list_t{i}'] for item in data]

        globals()[f'best_threshold{i}'] = 0
        globals()[f'best_accuracy{i}'] = 0
        globals()[f'best_predicted_labels{i}'] = []
        globals()[f'best_ground_truth_labels{i}'] = []
        globals()[f'tnr_old{i}'] = 0
        globals()[f'tpr_old{i}'] = 0
        
        for threshold in np.arange(0.00, 1.01, 0.01):
            test_old = np.array([is_double_compressed(globals()[f'MAE_data{i}'][j], globals()[f'FINAL_QP_data{i}'][j], threshold) for j in range(20)])
            predicted_labels = test_old.astype(int)
            ground_truth_labels = np.array(globals()[f'test_label{i}'])
            accuracy = np.sum(ground_truth_labels == predicted_labels) / len(ground_truth_labels)
    
            if accuracy > globals()[f'best_accuracy{i}']:
                globals()[f'best_accuracy{i}'] = accuracy
                globals()[f'best_threshold{i}'] = threshold
                globals()[f'best_predicted_labels{i}'] = predicted_labels
                globals()[f'best_ground_truth_labels{i}'] = ground_truth_labels


    best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = 0, None, None    
    best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = 0, None, None

        
    for C_value in C_values['C']:    
        # SVMモデルのインスタンスを作成
        svm_model_RBF = SVC(kernel='rbf', C=C_value, probability=True)
        svm_model_LINEAR = SVC(kernel='linear', C=C_value, probability=True)

        # 訓練データで訓練
        svm_model_RBF.fit(X_train, Y_train)        
        svm_model_LINEAR.fit(X_train, Y_train)

        val_accuracy_RBF = accuracy_score(Y_val, svm_model_RBF.predict(X_val))        
        val_accuracy_LINEAR = accuracy_score(Y_val, svm_model_LINEAR.predict(X_val))

        # 検証データでの精度が最も高かった場合、そのモデルを保存
        if val_accuracy_RBF > best_val_score_RBF:
            best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = val_accuracy_RBF, svm_model_RBF, C_value
            
        if val_accuracy_LINEAR > best_val_score_LINEAR:
            best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = val_accuracy_LINEAR, svm_model_LINEAR, C_value

    
    fold_results = {}
    for i in range(1, 28):
        # RBFモデルの評価
        predictions_RBF = best_svm_model_RBF.predict(globals()[f'test_data{i}'])
        predictions_prob_RBF = best_svm_model_RBF.predict_proba(globals()[f'test_data{i}'])[:, 1]  # ROCカーブ用のスコア
        accuracy_RBF = accuracy_score(globals()[f'test_label{i}'], predictions_RBF)
        globals()[f'accuracy_RBF{i}'] = accuracy_RBF
        report_RBF = classification_report(globals()[f'test_label{i}'], predictions_RBF, digits=4, zero_division=1)
        conf_matrix = confusion_matrix(globals()[f'test_label{i}'], predictions_RBF)
        globals()[f'tnr_rbf{i}'] = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[0, 1])
        globals()[f'tpr_rbf{i}'] = conf_matrix[1, 1] / (conf_matrix[1, 0] + conf_matrix[1, 1])
        fpr_rbf, tpr_rbf, _ = roc_curve(globals()[f'test_label{i}'], predictions_prob_RBF)
        auc_rbf = auc(fpr_rbf, tpr_rbf)
        globals()[f'auc_rbf{i}'] = auc_rbf
        # print(report_RBF)

        # LINEARモデルの評価
        predictions_LINEAR = best_svm_model_LINEAR.predict(globals()[f'test_data{i}'])
        predictions_prob_LINEAR = best_svm_model_LINEAR.predict_proba(globals()[f'test_data{i}'])[:, 1]  # ROCカーブ用のスコア
        accuracy_LINEAR = accuracy_score(globals()[f'test_label{i}'], predictions_LINEAR)
        globals()[f'accuracy_LINEAR{i}'] = accuracy_LINEAR
        report_LINEAR = classification_report(globals()[f'test_label{i}'], predictions_LINEAR, digits=4, zero_division=1)
        conf_matrix = confusion_matrix(globals()[f'test_label{i}'], predictions_LINEAR)
        globals()[f'tnr_linear{i}'] = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[0, 1])
        globals()[f'tpr_linear{i}'] = conf_matrix[1, 1] / (conf_matrix[1, 0] + conf_matrix[1, 1])
        fpr_linear, tpr_linear, _ = roc_curve(globals()[f'test_label{i}'], predictions_prob_LINEAR)
        auc_linear = auc(fpr_linear, tpr_linear)
        globals()[f'auc_linear{i}'] = auc_linear
        # print(report_LINEAR)

        # Old modelの評価
        thresholds = np.arange(0.00, 1.01, 0.01)
        tpr_old_list = []
        fpr_old_list = []
        for threshold in thresholds:
            predicted_labels_old = np.array([is_double_compressed(globals()[f'MAE_data{i}'][j], globals()[f'FINAL_QP_data{i}'][j], threshold) for j in range(20)])
            tn, fp, fn, tp = confusion_matrix(globals()[f'test_label{i}'], predicted_labels_old).ravel()
            tpr_old = tp / (tp + fn)
            fpr_old = fp / (fp + tn)
            tpr_old_list.append(tpr_old)
            fpr_old_list.append(fpr_old)
        
        auc_old = auc(fpr_old_list, tpr_old_list)
        globals()[f'auc_old{i}'] = auc_old

        # fold_resultsに保存
        fold_results[f'C_RBF{i}'] = best_c_value_RBF
        fold_results[f'Score_RBF{i}'] = globals()[f'accuracy_RBF{i}']
        fold_results[f'tnr_rbf{i}'] = globals()[f'tnr_rbf{i}']
        fold_results[f'tpr_rbf{i}'] = globals()[f'tpr_rbf{i}']
        fold_results[f'AUC_RBF{i}'] = globals()[f'auc_rbf{i}']

        fold_results[f'C_LINEAR{i}'] = best_c_value_LINEAR
        fold_results[f'Score_LINEAR{i}'] = globals()[f'accuracy_LINEAR{i}']
        fold_results[f'tnr_linear{i}'] = globals()[f'tnr_linear{i}']
        fold_results[f'tpr_linear{i}'] = globals()[f'tpr_linear{i}']
        fold_results[f'AUC_LINEAR{i}'] = globals()[f'auc_linear{i}']

        fold_results[f'Threshold{i}'] = globals()[f'best_threshold{i}']
        fold_results[f'Score_old{i}'] = globals()[f'best_accuracy{i}']
        fold_results[f'tnr_old{i}'] = globals()[f'tnr_old{i}']
        fold_results[f'tpr_old{i}'] = globals()[f'tpr_old{i}']
        fold_results[f'AUC_old{i}'] = globals()[f'auc_old{i}']

    # 結果をデータフレームに追加
    results = pd.concat([results, pd.DataFrame(fold_results, index=[fold])], axis=0)



<Fold-1>
Train indices: [0 1 2 3 4 5 6 7 9]
Test indices: [8]
4320
480
<Fold-2>
Train indices: [0 2 3 4 5 6 7 8 9]
Test indices: [1]
4320
480
<Fold-3>
Train indices: [0 1 2 3 4 6 7 8 9]
Test indices: [5]
4320
480
<Fold-4>
Train indices: [1 2 3 4 5 6 7 8 9]
Test indices: [0]
4320
480
<Fold-5>
Train indices: [0 1 2 3 4 5 6 8 9]
Test indices: [7]
4320
480
<Fold-6>
Train indices: [0 1 3 4 5 6 7 8 9]
Test indices: [2]
4320
480
<Fold-7>
Train indices: [0 1 2 3 4 5 6 7 8]
Test indices: [9]
4320
480
<Fold-8>
Train indices: [0 1 2 3 5 6 7 8 9]
Test indices: [4]
4320
480
<Fold-9>
Train indices: [0 1 2 4 5 6 7 8 9]
Test indices: [3]
4320
480
<Fold-10>
Train indices: [0 1 2 3 4 5 7 8 9]
Test indices: [6]
4320
480


In [28]:
# 各統計情報を100倍して小数点第2位までの表記に変更
statistics_data = {
    'Model': [f'RBF{i}' for i in range(1, 28)] + [f'LINEAR{i}' for i in range(1, 28)] + [f'OLD{i}' for i in range(1, 28)],
    'Average TNR': [
        round(results[f'tnr_rbf{i}'].mean() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'tnr_linear{i}'].mean() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'tnr_old{i}'].mean() * 100, 2) for i in range(1, 28)
    ],
    'Average TPR': [
        round(results[f'tpr_rbf{i}'].mean() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'tpr_linear{i}'].mean() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'tpr_old{i}'].mean() * 100, 2) for i in range(1, 28)
    ],
    'Average Test Score': [
        round(results[f'Score_RBF{i}'].mean() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_LINEAR{i}'].mean() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_old{i}'].mean() * 100, 2) for i in range(1, 28)
    ],
    'Standard Deviation': [
        round(results[f'Score_RBF{i}'].std() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_LINEAR{i}'].std() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_old{i}'].std() * 100, 2) for i in range(1, 28)
    ],
    'Max Test Score': [
        round(results[f'Score_RBF{i}'].max() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_LINEAR{i}'].max() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_old{i}'].max() * 100, 2) for i in range(1, 28)
    ],
    'Min Test Score': [
        round(results[f'Score_RBF{i}'].min() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_LINEAR{i}'].min() * 100, 2) for i in range(1, 28)
    ] + [
        round(results[f'Score_old{i}'].min() * 100, 2) for i in range(1, 28)
    ],
    'Average AUC': [
        round(results[f'AUC_RBF{i}'].mean(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_LINEAR{i}'].mean(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_old{i}'].mean(), 2) for i in range(1, 28)
    ],
    'AUC STD': [
        round(results[f'AUC_RBF{i}'].std(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_LINEAR{i}'].std(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_old{i}'].std(), 2) for i in range(1, 28)
    ],
    'Max AUC': [
        round(results[f'AUC_RBF{i}'].max(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_LINEAR{i}'].max(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_old{i}'].max(), 2) for i in range(1, 28)
    ],
    'Min AUC': [
        round(results[f'AUC_RBF{i}'].min(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_LINEAR{i}'].min(), 2) for i in range(1, 28)
    ] + [
        round(results[f'AUC_old{i}'].min(), 2) for i in range(1, 28)
    ],
}

# DataFrameを作成
statistics_df = pd.DataFrame(statistics_data)

# 表示
print(statistics_df)



# 関数を定義して、各セグメントの統計情報を計算
def calculate_statistics(segment, prefix):
    # モデル番号を抽出してフラットなリストに変換
    model_numbers = statistics_df['Model'].str.extract(r'(\d+)').astype(int)[0]
    is_in_segment = model_numbers.isin(segment)
    is_correct_prefix = statistics_df['Model'].str.startswith(prefix)
    
    tnr_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average TNR'].mean(), 2)
    tpr_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average TPR'].mean(), 2)
    acc_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average Test Score'].mean(), 2)
    acc_std = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Standard Deviation'].std(), 2)
    
    acc_max = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Max Test Score'].max(), 2)
    acc_min = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Min Test Score'].min(), 2)

    auc_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average AUC'].mean(), 2)
    auc_std = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'AUC STD'].std(), 2)
    auc_max = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Max AUC'].max(), 2)
    auc_min = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Min AUC'].min(), 2)
    
    return tnr_mean, tpr_mean, acc_mean, acc_std, acc_max, acc_min, auc_mean, auc_std, auc_max, auc_min

# セグメントを定義
segments = {
    '1_10': list(range(1, 10)),
    '10_19': list(range(10, 19)),
    '19_28': list(range(19, 28))
}

# 結果を保存するリスト
results_summary = []

# 統計情報を計算して表示
for model in ['RBF', 'LINEAR', 'OLD']:
    for segment_name, segment in segments.items():
        tnr_mean, tpr_mean, acc_mean, acc_std, acc_max, acc_min, auc_mean, auc_std, auc_max, auc_min = calculate_statistics(segment, model)
        results_summary.append({
            'Model': f'{model}_{segment_name}',
            'Average TNR': tnr_mean,
            'Average TPR': tpr_mean,
            'Average Test Score': acc_mean,
            'Test Score STD': acc_std,
            'Test Score MAX': acc_max,
            'Test Score MIN': acc_min,
            'Average AUC': auc_mean,
            'AUC STD': auc_std,
            'Max AUC': auc_max,
            'Min AUC': auc_min
        })

# DataFrameに変換
summary_df = pd.DataFrame(results_summary)

# 表示
print(summary_df)


    Model  Average TNR  Average TPR  Average Test Score  Standard Deviation  Max Test Score  Min Test Score  Average AUC  AUC STD  Max AUC  Min AUC
0    RBF1         67.0         67.0                67.0                5.87            75.0            60.0         0.74     0.03     0.78     0.67
1    RBF2         94.0         73.0                83.5                5.30            90.0            75.0         0.96     0.02     0.99     0.92
2    RBF3         88.0         83.0                85.5                5.99            95.0            75.0         0.94     0.03     0.98     0.90
3    RBF4         76.0         70.0                73.0                4.22            80.0            65.0         0.80     0.03     0.83     0.74
4    RBF5         89.0        100.0                94.5                2.84           100.0            90.0         1.00     0.00     1.00     0.99
..    ...          ...          ...                 ...                 ...             ...             ...     

In [29]:
print(results['C_RBF1'])
print(results['C_LINEAR1'])

0    100
1    100
2    100
3     10
4    100
5     10
6    100
7     10
8     10
9     10
Name: C_RBF1, dtype: object
0     100
1    1000
2    1000
3       1
4     100
5     100
6    2000
7      10
8       1
9    2000
Name: C_LINEAR1, dtype: object


In [30]:
statistics_df.to_csv('statistics_data8.csv', index=False)