In [1]:
import random
import os
import os.path as osp
import re
import pickle
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks
import seaborn as sns
import pickle
import torch
import glob
from scipy.stats import entropy
from collections import defaultdict, Counter

pd.set_option('display.expand_frame_repr', False)  # DataFrameを改行せずに表示
pd.set_option('display.max_columns', None)  # すべての列を表示

In [2]:
def extract_finalQP(filename):
    match = re.search(r'2ndQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def extract_1stQP(filename):
    match = re.search(r'1stQP(\d+)', filename)
    if match:
        return int(match.group(1))
    
    return None


def ratio_double_compressed(mean_difference, final_QP):
    # mean_difference = mean_difference[0]
    # final_QP = final_QP[0]
    clamped_mean_difference = np.maximum(mean_difference, -0.01)
    
    #全体のエネルギーを計算
    energy = np.sum(np.square(clamped_mean_difference))
    # energy = np.sum(np.square(mean_difference))
    
    #QP2より右側のエネルギーを計算
    right_energy = np.sum(np.square(clamped_mean_difference[final_QP+1:52]))

        
    # エネルギー比を計算して閾値と比較
    if energy > 0:
        return right_energy / energy
    
    else:
        return 0

    
def is_double_compressed(mean_difference, final_QP, threshold):
    mean_difference = mean_difference[0]
    final_QP = final_QP[0]
    clamped_mean_difference = np.maximum(mean_difference, -0.01)
    
    #全体のエネルギーを計算
    energy = np.sum(np.square(clamped_mean_difference))
    # energy = np.sum(np.square(mean_difference))
    
    #QP2より右側のエネルギーを計算
    right_energy = np.sum(np.square(clamped_mean_difference[final_QP+1:52]))
    # right_energy = np.sum(np.square(mean_difference[final_QP+1:52]))
    
    # print('energy: ', energy)
    # print('R-energy: ', right_energy)
    # print('Ratio: ', right_energy / energy)
    
    
    # エネルギー比を計算して閾値と比較
    if energy <= 0:
        return -1
    
    elif (right_energy / energy) != 0 and (right_energy / energy) > threshold:
        return True
    
    elif (right_energy / energy) != 0 and (right_energy / energy) <= threshold:
        return False
    
    else:
        return -1

def calculate_mae(file_path):
    try:
        with open(file_path, 'rb') as file:
            loaded_data, loaded_data_shifted = pickle.load(file)
    except Exception as e:
        print(f"Error occurred while loading {file_path}: {e}")
        return None

    # タプル内のリストを抽出
    original_mae = np.array(loaded_data)
    shifted_mae = np.array(loaded_data_shifted)

    # Coding ghostを計算してリストに格納する
    mae_difference = shifted_mae - original_mae
    
    # mae_differenceの各要素においてマイナスの値を0に変換
    # mae_difference_positive = np.maximum(mae_difference, 0)
    
    return mae_difference

In [3]:

rootpath_csv = "/Prove/Yoshihisa/HEIF_ghost/HEIF_IMAGES_CSV/"
rootpath_pkl = "/Prove/Yoshihisa/HEIF_ghost/PKL/"

train_list1 = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"]
train_list2 = ["31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60"]
train_list3 = ["61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90"]
train_list4 = ["91", "92", "93", "94", "95", "96", "97", "98", "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109", "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120"]
train_list5 = ["121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131", "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", "143", "144", "145", "146", "147", "148", "149", "150"]
train_list6 = ["151", "152", "153", "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164", "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175", "176", "177", "178", "179", "180"]
train_list7 = ["181", "182", "183", "184", "185", "186", "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197", "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208", "209", "210"]
train_list8 = ["211", "212", "213", "214", "215", "216", "217", "218", "219", "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230", "231", "232", "233", "234", "235", "236", "237", "238", "239", "240"]
train_list9 = ["241", "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252", "253", "254", "255", "256", "257", "258", "259", "260", "261", "262", "263", "264", "265", "266", "267", "268", "269", "270"]
train_list10 = ["271", "272", "273", "274", "275", "276", "277", "278", "279", "280", "281", "282", "283", "284", "285", "286", "287", "288", "289", "290", "291", "292", "293", "294", "295", "296", "297", "298", "299", "300"]

all_train_lists = [train_list1, train_list2, train_list3, train_list4, train_list5,
                   train_list6, train_list7, train_list8, train_list9, train_list10]

# すべてのリストを1つのリストに結合する
combined_train_list = sum(all_train_lists, [])

# リストの順序をランダムにシャッフルする
random.shuffle(combined_train_list)

# シャッフルされたリストを10個のグループに分割する
train_lists = [combined_train_list[i:i+30] for i in range(0, len(combined_train_list), 30)]
print(train_lists)



# CSV関連のリストを生成
csv_single_listsA = [[] for _ in range(10)]
csv_single_recompress_listsA = [[] for _ in range(10)]
csv_second_largeQP1_listsA = [[] for _ in range(10)]
csv_second_recompress_largeQP1_listsA = [[] for _ in range(10)]
csv_second_sameQP_listsA = [[] for _ in range(10)]
csv_second_recompress_sameQP_listsA = [[] for _ in range(10)]
csv_second_largeQP2_listsA = [[] for _ in range(10)]
csv_second_recompress_largeQP2_listsA = [[] for _ in range(10)]

def process_csv_lists(rootpath, train_list, single_list, single_recompress_list, 
                      second_largeQP1_list, second_recompress_largeQP1_list, 
                      second_sameQP_list, second_recompress_sameQP_list,
                      second_largeQP2_list, second_recompress_largeQP2_list):
    
    for image in train_list:
        single_path = osp.join(rootpath, f'HEIF_images_single_csv/{image}_*')
        single_recompress_path = osp.join(rootpath, f'HEIF_images_second_sameQP_csv/{image}_*')
        
        second_largeQP1_path = osp.join(rootpath, f'HEIF_images_second_csv/{image}_*')
        second_recompress_largeQP1_path = osp.join(rootpath, f'HEIF_images_triple_csv/{image}_*')
        
        second_sameQP_path = osp.join(rootpath, f'HEIF_images_second_sameQP_csv/{image}_*')
        second_recompress_sameQP_path = osp.join(rootpath, f'HEIF_images_triple_sameQP_csv/{image}_*')
        
        second_largeQP2_path = osp.join(rootpath, f'HEIF_images_second_largeQP_csv/{image}_*')
        second_recompress_largeQP2_path = osp.join(rootpath, f'HEIF_images_triple_largeQP_csv/{image}_*')
        
        for path in sorted(glob.glob(single_path)):
            single_list.append(path)
        for path in sorted(glob.glob(single_recompress_path)):
            single_recompress_list.append(path)
        for path in sorted(glob.glob(second_largeQP1_path)):
            second_largeQP1_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP1_path)):
            second_recompress_largeQP1_list.append(path)
        for path in sorted(glob.glob(second_sameQP_path)):
            second_sameQP_list.append(path)
        for path in sorted(glob.glob(second_recompress_sameQP_path)):
            second_recompress_sameQP_list.append(path)
        for path in sorted(glob.glob(second_largeQP2_path)):
            second_largeQP2_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP2_path)):
            second_recompress_largeQP2_list.append(path)

# 各カテゴリのCSVリストを生成
for train_list, single_list, single_recompress_list in zip(train_lists, 
                                                           csv_single_listsA,
                                                           csv_single_recompress_listsA):
    process_csv_lists(rootpath_csv, train_list, single_list, single_recompress_list, 
                      [], [], [], [], [], [])


for train_list, second_largeQP1_list, second_recompress_largeQP1_list, second_sameQP_list, second_recompress_sameQP_list, second_largeQP2_list, second_recompress_largeQP2_list in zip(train_lists, 
                                                                                                                                                                                                                   csv_second_largeQP1_listsA,
                                                                                                                                                                                                                   csv_second_recompress_largeQP1_listsA,
                                                                                                                                                                                                                   csv_second_sameQP_listsA,
                                                                                                                                                                                                                   csv_second_recompress_sameQP_listsA,
                                                                                                                                                                                                                   csv_second_largeQP2_listsA,
                                                                                                                                                                                                                   csv_second_recompress_largeQP2_listsA):
    process_csv_lists(rootpath_csv, train_list, [], [], 
                      second_largeQP1_list, second_recompress_largeQP1_list, 
                      second_sameQP_list, second_recompress_sameQP_list,
                      second_largeQP2_list, second_recompress_largeQP2_list)

    

    
# 出力リストを初期化
pkl_single_listsA = [[] for _ in range(10)]
pkl_single_recompress_listsA = [[] for _ in range(10)]
pkl_second_largeQP1_listsA = [[] for _ in range(10)]
pkl_second_recompress_largeQP1_listsA = [[] for _ in range(10)]
pkl_second_sameQP_listsA = [[] for _ in range(10)]
pkl_second_recompress_sameQP_listsA = [[] for _ in range(10)]
pkl_second_largeQP2_listsA = [[] for _ in range(10)]
pkl_second_recompress_largeQP2_listsA = [[] for _ in range(10)]    

def process_train_lists_pkl(rootpath, train_list, single_list, single_recompress_list, 
                            second_largeQP1_list, second_recompress_largeQP1_list, 
                            second_sameQP_list, second_recompress_sameQP_list,
                            second_largeQP2_list, second_recompress_largeQP2_list):
    
    for image in train_list:
        single_path = osp.join(rootpath, f'pkl_single/{image}_*')
        single_recompress_path = osp.join(rootpath, f'pkl_second_sameQP/{image}_*')
        
        second_largeQP1_path = osp.join(rootpath, f'pkl_second/{image}_*')
        second_recompress_largeQP1_path = osp.join(rootpath, f'pkl_triple/{image}_*')
        
        second_sameQP_path = osp.join(rootpath, f'pkl_second_sameQP/{image}_*')
        second_recompress_sameQP_path = osp.join(rootpath, f'pkl_triple_sameQP/{image}_*')
        
        second_largeQP2_path = osp.join(rootpath, f'pkl_second_largeQP/{image}_*')
        second_recompress_largeQP2_path = osp.join(rootpath, f'pkl_triple_largeQP/{image}_*')
        

        for path in sorted(glob.glob(single_path)):
            single_list.append(path)
        for path in sorted(glob.glob(single_recompress_path)):
            single_recompress_list.append(path)
            
        for path in sorted(glob.glob(second_largeQP1_path)):
            second_largeQP1_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP1_path)):
            second_recompress_largeQP1_list.append(path)
                
        for path in sorted(glob.glob(second_sameQP_path)):
            second_sameQP_list.append(path)
        for path in sorted(glob.glob(second_recompress_sameQP_path)):
            second_recompress_sameQP_list.append(path)
            
        for path in sorted(glob.glob(second_largeQP2_path)):
            second_largeQP2_list.append(path)
        for path in sorted(glob.glob(second_recompress_largeQP2_path)):
            second_recompress_largeQP2_list.append(path)

# 各カテゴリのリストを生成
for train_list, single_list, single_recompress_list in zip(train_lists, 
                                                           pkl_single_listsA,
                                                           pkl_single_recompress_listsA):
    process_train_lists_pkl(rootpath_pkl, train_list, single_list, single_recompress_list, 
                            [], [], [], [], [], [])


for train_list, second_largeQP1_list, second_recompress_largeQP1_list, second_sameQP_list, second_recompress_sameQP_list, second_largeQP2_list, second_recompress_largeQP2_list in zip(train_lists, 
                                                                                                                                                                                                                   pkl_second_largeQP1_listsA,
                                                                                                                                                                                                                   pkl_second_recompress_largeQP1_listsA,
                                                                                                                                                                                                                   pkl_second_sameQP_listsA,
                                                                                                                                                                                                                   pkl_second_recompress_sameQP_listsA,
                                                                                                                                                                                                                   pkl_second_largeQP2_listsA,
                                                                                                                                                                                                                   pkl_second_recompress_largeQP2_listsA):
    process_train_lists_pkl(rootpath_pkl, train_list, [], [], 
                            second_largeQP1_list, second_recompress_largeQP1_list, 
                            second_sameQP_list, second_recompress_sameQP_list,
                            second_largeQP2_list, second_recompress_largeQP2_list)


print("\nCSV Single ListsA:")
for i, lst in enumerate(csv_single_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Single Recompress ListsA:")
for i, lst in enumerate(csv_single_recompress_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Large QP1 ListsA:")
for i, lst in enumerate(csv_second_largeQP1_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Recompress Large QP1 ListsA:")
for i, lst in enumerate(csv_second_recompress_largeQP1_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Same QP ListsA:")
for i, lst in enumerate(csv_second_sameQP_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Recompress Same QP ListsA:")
for i, lst in enumerate(csv_second_recompress_sameQP_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Large QP2 ListsA:")
for i, lst in enumerate(csv_second_largeQP2_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

print("\nCSV Second Recompress Large QP2 ListsA:")
for i, lst in enumerate(csv_second_recompress_largeQP2_listsA, 1):
    print(f"CSV List {i}A: {len(lst)}")

# 出力リストを表示
print("\nPKL Single ListsA:")
for i, lst in enumerate(pkl_single_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Single Recompress ListsA:")
for i, lst in enumerate(pkl_single_recompress_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Large QP1 ListsA:")
for i, lst in enumerate(pkl_second_largeQP1_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Recompress Large QP1 ListsA:")
for i, lst in enumerate(pkl_second_recompress_largeQP1_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Same QP ListsA:")
for i, lst in enumerate(pkl_second_sameQP_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Recompress Same QP ListsA:")
for i, lst in enumerate(pkl_second_recompress_sameQP_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Large QP2 ListsA:")
for i, lst in enumerate(pkl_second_largeQP2_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

print("\nPKL Second Recompress Large QP2 ListsA:")
for i, lst in enumerate(pkl_second_recompress_largeQP2_listsA, 1):
    print(f"PKL List {i}A: {len(lst)}")

[['141', '89', '281', '272', '95', '172', '159', '238', '283', '198', '74', '266', '278', '226', '106', '212', '231', '124', '274', '113', '75', '295', '166', '25', '285', '249', '40', '279', '225', '144'], ['93', '13', '241', '254', '255', '103', '284', '230', '282', '180', '2', '184', '114', '83', '164', '299', '214', '208', '232', '52', '123', '20', '292', '49', '43', '55', '218', '91', '143', '3'], ['171', '29', '268', '267', '64', '162', '222', '98', '202', '41', '261', '121', '125', '110', '191', '44', '163', '220', '66', '280', '119', '259', '104', '177', '59', '147', '86', '107', '203', '32'], ['51', '189', '78', '5', '130', '216', '201', '38', '243', '30', '157', '60', '63', '152', '15', '9', '199', '140', '28', '288', '291', '167', '138', '37', '176', '117', '296', '73', '101', '210'], ['206', '193', '11', '205', '185', '187', '287', '137', '79', '111', '118', '175', '173', '276', '99', '135', '229', '197', '109', '156', '82', '161', '251', '252', '271', '213', '211', '286', 

In [4]:
QP5 = ["_1stQP5_"]
QP10 = ["_1stQP10_"]
QP16 = ["_1stQP16_"]
QP20 = ["_1stQP20_"]
QP24 = ["_1stQP24_"]
QP27 = ["_1stQP27_"]
QP32 = ["_1stQP32_"]
QP39 = ["_1stQP39_"]
QP42 = ["_1stQP42_"]
QP45 = ["_1stQP45_"]

In [5]:
QP10_QP5 = ["_1stQP10_2ndQP5_"]

QP15_QP5 = ["_1stQP15_2ndQP5_"]
QP15_QP10 = ["_1stQP15_2ndQP10_"]

QP20_QP5 = ["_1stQP20_2ndQP5_"]
QP20_QP10 = ["_1stQP20_2ndQP10_"]
QP20_QP16 = ["_1stQP20_2ndQP16_"]

QP25_QP5 = ["_1stQP25_2ndQP5_"]
QP25_QP10 = ["_1stQP25_2ndQP10_"]
QP25_QP16 = ["_1stQP25_2ndQP16_"]
QP25_QP20 = ["_1stQP25_2ndQP20_"]
QP25_QP24 = ["_1stQP25_2ndQP24_"]

QP30_QP5 = ["_1stQP30_2ndQP5_"]
QP30_QP10 = ["_1stQP30_2ndQP10_"]
QP30_QP16 = ["_1stQP30_2ndQP16_"]
QP30_QP20 = ["_1stQP30_2ndQP20_"]
QP30_QP24 = ["_1stQP30_2ndQP24_"]
QP30_QP27 = ["_1stQP30_2ndQP27_"]

QP32_QP5 = ["_1stQP32_2ndQP5_"]
QP32_QP10 = ["_1stQP32_2ndQP10_"]
QP32_QP16 = ["_1stQP32_2ndQP16_"]
QP32_QP20 = ["_1stQP32_2ndQP20_"]
QP32_QP24 = ["_1stQP32_2ndQP24_"]
QP32_QP27 = ["_1stQP32_2ndQP27_"]

QP35_QP5 = ["_1stQP35_2ndQP5_"]
QP35_QP10 = ["_1stQP35_2ndQP10_"]
QP35_QP16 = ["_1stQP35_2ndQP16_"]
QP35_QP20 = ["_1stQP35_2ndQP20_"]
QP35_QP24 = ["_1stQP35_2ndQP24_"]
QP35_QP27 = ["_1stQP35_2ndQP27_"]
QP35_QP32 = ["_1stQP35_2ndQP32_"]

QP40_QP5 = ["_1stQP40_2ndQP5_"]
QP40_QP10 = ["_1stQP40_2ndQP10_"]
QP40_QP16 = ["_1stQP40_2ndQP16_"]
QP40_QP20 = ["_1stQP40_2ndQP20_"]
QP40_QP24 = ["_1stQP40_2ndQP24_"]
QP40_QP27 = ["_1stQP40_2ndQP27_"]
QP40_QP32 = ["_1stQP40_2ndQP32_"]
QP40_QP39 = ["_1stQP40_2ndQP39_"]

QP45_QP5 = ["_1stQP45_2ndQP5_"]
QP45_QP10 = ["_1stQP45_2ndQP10_"]
QP45_QP16 = ["_1stQP45_2ndQP16_"]
QP45_QP20 = ["_1stQP45_2ndQP20_"]
QP45_QP24 = ["_1stQP45_2ndQP24_"]
QP45_QP27 = ["_1stQP45_2ndQP27_"]
QP45_QP32 = ["_1stQP45_2ndQP32_"]
QP45_QP39 = ["_1stQP45_2ndQP39_"]
QP45_QP42 = ["_1stQP45_2ndQP42_"]

QP50_QP5 = ["_1stQP50_2ndQP5_"]
QP50_QP10 = ["_1stQP50_2ndQP10_"]
QP50_QP16 = ["_1stQP50_2ndQP16_"]
QP50_QP20 = ["_1stQP50_2ndQP20_"]
QP50_QP24 = ["_1stQP50_2ndQP24_"]
QP50_QP27 = ["_1stQP50_2ndQP27_"]
QP50_QP32 = ["_1stQP50_2ndQP32_"]
QP50_QP39 = ["_1stQP50_2ndQP39_"]
QP50_QP42 = ["_1stQP50_2ndQP42_"]
QP50_QP45 = ["_1stQP50_2ndQP45_"]

In [6]:
QP5_QP5 = ["_1stQP5_2ndQP5"]
QP10_QP10 = ["_1stQP10_2ndQP10"]
QP16_QP16 = ["_1stQP16_2ndQP16"]
QP20_QP20 = ["_1stQP20_2ndQP20"]
QP24_QP24 = ["_1stQP24_2ndQP24"]
QP27_QP27 = ["_1stQP27_2ndQP27"]
QP32_QP32 = ["_1stQP32_2ndQP32"]
QP39_QP39 = ["_1stQP39_2ndQP39"]
QP42_QP42 = ["_1stQP42_2ndQP42"]
QP45_QP45 = ["_1stQP45_2ndQP45"]

In [7]:
QP10_QP16 = ["_1stQP10_2ndQP16"]
QP10_QP20 = ["_1stQP10_2ndQP20"]
QP10_QP24 = ["_1stQP10_2ndQP24"]
QP10_QP27 = ["_1stQP10_2ndQP27"]
QP10_QP32 = ["_1stQP10_2ndQP32"]
QP10_QP39 = ["_1stQP10_2ndQP39"]
QP10_QP42 = ["_1stQP10_2ndQP42"]
QP10_QP45 = ["_1stQP10_2ndQP45"]

QP15_QP16 = ["_1stQP15_2ndQP16"]
QP15_QP20 = ["_1stQP15_2ndQP20"]
QP15_QP24 = ["_1stQP15_2ndQP24"]
QP15_QP27 = ["_1stQP15_2ndQP27"]
QP15_QP32 = ["_1stQP15_2ndQP32"]
QP15_QP39 = ["_1stQP15_2ndQP39"]
QP15_QP42 = ["_1stQP15_2ndQP42"]
QP15_QP45 = ["_1stQP15_2ndQP45"]

QP20_QP24 = ["_1stQP20_2ndQP24"]
QP20_QP27 = ["_1stQP20_2ndQP27"]
QP20_QP32 = ["_1stQP20_2ndQP32"]
QP20_QP39 = ["_1stQP20_2ndQP39"]
QP20_QP42 = ["_1stQP20_2ndQP42"]
QP20_QP45 = ["_1stQP20_2ndQP45"]

QP25_QP27 = ["_1stQP25_2ndQP27"]
QP25_QP32 = ["_1stQP25_2ndQP32"]
QP25_QP39 = ["_1stQP25_2ndQP39"]
QP25_QP42 = ["_1stQP25_2ndQP42"]
QP25_QP45 = ["_1stQP25_2ndQP45"]

QP30_QP32 = ["_1stQP30_2ndQP32"]
QP30_QP39 = ["_1stQP30_2ndQP39"]
QP30_QP42 = ["_1stQP30_2ndQP42"]
QP30_QP45 = ["_1stQP30_2ndQP45"]

QP32_QP39 = ["_1stQP32_2ndQP39"]
QP32_QP42 = ["_1stQP32_2ndQP42"]
QP32_QP45 = ["_1stQP32_2ndQP45"]

QP35_QP39 = ["_1stQP35_2ndQP39"]
QP35_QP42 = ["_1stQP35_2ndQP42"]
QP35_QP45 = ["_1stQP35_2ndQP45"]

QP40_QP42 = ["_1stQP40_2ndQP42"]
QP40_QP45 = ["_1stQP40_2ndQP45"]

In [8]:
# single_listsおよびsingle_recompress_listsは初期化されている前提
single_csv1 = list(zip(csv_single_listsA[0], pkl_single_listsA[0], csv_single_recompress_listsA[0], pkl_single_recompress_listsA[0]))
single_csv2 = list(zip(csv_single_listsA[1], pkl_single_listsA[1], csv_single_recompress_listsA[1], pkl_single_recompress_listsA[1]))
single_csv3 = list(zip(csv_single_listsA[2], pkl_single_listsA[2], csv_single_recompress_listsA[2], pkl_single_recompress_listsA[2]))
single_csv4 = list(zip(csv_single_listsA[3], pkl_single_listsA[3], csv_single_recompress_listsA[3], pkl_single_recompress_listsA[3]))
single_csv5 = list(zip(csv_single_listsA[4], pkl_single_listsA[4], csv_single_recompress_listsA[4], pkl_single_recompress_listsA[4]))
single_csv6 = list(zip(csv_single_listsA[5], pkl_single_listsA[5], csv_single_recompress_listsA[5], pkl_single_recompress_listsA[5]))
single_csv7 = list(zip(csv_single_listsA[6], pkl_single_listsA[6], csv_single_recompress_listsA[6], pkl_single_recompress_listsA[6]))
single_csv8 = list(zip(csv_single_listsA[7], pkl_single_listsA[7], csv_single_recompress_listsA[7], pkl_single_recompress_listsA[7]))
single_csv9 = list(zip(csv_single_listsA[8], pkl_single_listsA[8], csv_single_recompress_listsA[8], pkl_single_recompress_listsA[8]))
single_csv10 = list(zip(csv_single_listsA[9], pkl_single_listsA[9], csv_single_recompress_listsA[9], pkl_single_recompress_listsA[9]))
print(len(single_csv9))


single_QP5 = [item for item in single_csv10 if any(qp in item[0] for qp in QP5)]
single_QP10 = [item for item in single_csv10 if any(qp in item[0] for qp in QP10)]
single_QP16 = [item for item in single_csv10 if any(qp in item[0] for qp in QP16)]
single_QP20 = [item for item in single_csv10 if any(qp in item[0] for qp in QP20)]
single_QP24 = [item for item in single_csv10 if any(qp in item[0] for qp in QP24)]
single_QP27 = [item for item in single_csv10 if any(qp in item[0] for qp in QP27)]
single_QP32 = [item for item in single_csv10 if any(qp in item[0] for qp in QP32)]
single_QP39 = [item for item in single_csv10 if any(qp in item[0] for qp in QP39)]
single_QP42 = [item for item in single_csv10 if any(qp in item[0] for qp in QP42)]
single_QP45 = [item for item in single_csv10 if any(qp in item[0] for qp in QP45)]
print(len(single_QP45))

300
30


In [9]:
# Large_QP1
second_largeQP1_csv1 = list(zip(csv_second_largeQP1_listsA[0], pkl_second_largeQP1_listsA[0], csv_second_recompress_largeQP1_listsA[0], pkl_second_recompress_largeQP1_listsA[0]))
second_largeQP1_csv2 = list(zip(csv_second_largeQP1_listsA[1], pkl_second_largeQP1_listsA[1], csv_second_recompress_largeQP1_listsA[1], pkl_second_recompress_largeQP1_listsA[1]))
second_largeQP1_csv3 = list(zip(csv_second_largeQP1_listsA[2], pkl_second_largeQP1_listsA[2], csv_second_recompress_largeQP1_listsA[2], pkl_second_recompress_largeQP1_listsA[2]))
second_largeQP1_csv4 = list(zip(csv_second_largeQP1_listsA[3], pkl_second_largeQP1_listsA[3], csv_second_recompress_largeQP1_listsA[3], pkl_second_recompress_largeQP1_listsA[3]))
second_largeQP1_csv5 = list(zip(csv_second_largeQP1_listsA[4], pkl_second_largeQP1_listsA[4], csv_second_recompress_largeQP1_listsA[4], pkl_second_recompress_largeQP1_listsA[4]))
second_largeQP1_csv6 = list(zip(csv_second_largeQP1_listsA[5], pkl_second_largeQP1_listsA[5], csv_second_recompress_largeQP1_listsA[5], pkl_second_recompress_largeQP1_listsA[5]))
second_largeQP1_csv7 = list(zip(csv_second_largeQP1_listsA[6], pkl_second_largeQP1_listsA[6], csv_second_recompress_largeQP1_listsA[6], pkl_second_recompress_largeQP1_listsA[6]))
second_largeQP1_csv8 = list(zip(csv_second_largeQP1_listsA[7], pkl_second_largeQP1_listsA[7], csv_second_recompress_largeQP1_listsA[7], pkl_second_recompress_largeQP1_listsA[7]))
second_largeQP1_csv9 = list(zip(csv_second_largeQP1_listsA[8], pkl_second_largeQP1_listsA[8], csv_second_recompress_largeQP1_listsA[8], pkl_second_recompress_largeQP1_listsA[8]))
second_largeQP1_csv10 = list(zip(csv_second_largeQP1_listsA[9], pkl_second_largeQP1_listsA[9], csv_second_recompress_largeQP1_listsA[9], pkl_second_recompress_largeQP1_listsA[9]))
print(len(second_largeQP1_csv1))


second_largeQP1_csv1 = random.sample(second_largeQP1_csv1, 100)
second_largeQP1_csv2 = random.sample(second_largeQP1_csv2, 100)
second_largeQP1_csv3 = random.sample(second_largeQP1_csv3, 100)
second_largeQP1_csv4 = random.sample(second_largeQP1_csv4, 100)
second_largeQP1_csv5 = random.sample(second_largeQP1_csv5, 100)
second_largeQP1_csv6 = random.sample(second_largeQP1_csv6, 100)
second_largeQP1_csv7 = random.sample(second_largeQP1_csv7, 100)
second_largeQP1_csv8 = random.sample(second_largeQP1_csv8, 100)
second_largeQP1_csv9 = random.sample(second_largeQP1_csv9, 100)
# second_largeQP1_csv10 = selected_data[9]
print('\ndouble images train by QP1>QP2: ', len(second_largeQP1_csv1))

second_QP10_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP10_QP5)]
second_QP15_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP15_QP5)]
second_QP15_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP15_QP10)]
second_QP20_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP20_QP5)]
second_QP20_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP20_QP10)]
second_QP20_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP20_QP16)]
second_QP25_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP25_QP5)]
second_QP25_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP25_QP10)]
second_QP25_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP25_QP16)]
second_QP25_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP25_QP20)]
second_QP25_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP25_QP24)]
second_QP30_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP30_QP5)]
second_QP30_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP30_QP10)]
second_QP30_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP30_QP16)]
second_QP30_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP30_QP20)]
second_QP30_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP30_QP24)]
second_QP30_QP27 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP30_QP27)]
second_QP32_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP32_QP5)]
second_QP32_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP32_QP10)]
second_QP32_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP32_QP16)]
second_QP32_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP32_QP20)]
second_QP32_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP32_QP24)]
second_QP32_QP27 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP32_QP27)]
second_QP35_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP5)]
second_QP35_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP10)]
second_QP35_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP16)]
second_QP35_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP20)]
second_QP35_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP24)]
second_QP35_QP27 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP27)]
second_QP35_QP32 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP35_QP32)]
second_QP40_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP5)]
second_QP40_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP10)]
second_QP40_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP16)]
second_QP40_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP20)]
second_QP40_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP24)]
second_QP40_QP27 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP27)]
second_QP40_QP32 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP32)]
second_QP40_QP39 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP40_QP39)]
second_QP45_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP5)]
second_QP45_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP10)]
second_QP45_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP16)]
second_QP45_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP20)]
second_QP45_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP24)]
second_QP45_QP27 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP27)]
second_QP45_QP32 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP32)]
second_QP45_QP39 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP39)]
second_QP45_QP42 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP45_QP42)]
second_QP50_QP5 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP5)]
second_QP50_QP10 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP10)]
second_QP50_QP16 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP16)]
second_QP50_QP20 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP20)]
second_QP50_QP24 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP24)]
second_QP50_QP27 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP27)]
second_QP50_QP32 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP32)]
second_QP50_QP39 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP39)]
second_QP50_QP42 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP42)]
second_QP50_QP45 = [item for item in second_largeQP1_csv10 if any(qp in item[0] for qp in QP50_QP45)]
print('\ndouble images test by QP1>QP2: ', len(second_QP50_QP45))

# second_largeQP1_csv1 = random.sample(second_largeQP1_csv1, 100)
# second_largeQP1_csv2 = random.sample(second_largeQP1_csv2, 100)
# second_largeQP1_csv3 = random.sample(second_largeQP1_csv3, 100)
# second_largeQP1_csv4 = random.sample(second_largeQP1_csv4, 100)
# second_largeQP1_csv5 = random.sample(second_largeQP1_csv5, 100)
# second_largeQP1_csv6 = random.sample(second_largeQP1_csv6, 100)
# second_largeQP1_csv7 = random.sample(second_largeQP1_csv7, 100)
# second_largeQP1_csv8 = random.sample(second_largeQP1_csv8, 100)
# second_largeQP1_csv9 = random.sample(second_largeQP1_csv9, 100)
# second_largeQP1_csv10 = random.sample(second_largeQP1_csv10, 30)

1710

double images train by QP1>QP2:  100

double images test by QP1>QP2:  30


In [10]:
# sameQP
# sameQP
second_sameQP_csv1 = list(zip(csv_second_sameQP_listsA[0], pkl_second_sameQP_listsA[0], csv_second_recompress_sameQP_listsA[0], pkl_second_recompress_sameQP_listsA[0]))
second_sameQP_csv2 = list(zip(csv_second_sameQP_listsA[1], pkl_second_sameQP_listsA[1], csv_second_recompress_sameQP_listsA[1], pkl_second_recompress_sameQP_listsA[1]))
second_sameQP_csv3 = list(zip(csv_second_sameQP_listsA[2], pkl_second_sameQP_listsA[2], csv_second_recompress_sameQP_listsA[2], pkl_second_recompress_sameQP_listsA[2]))
second_sameQP_csv4 = list(zip(csv_second_sameQP_listsA[3], pkl_second_sameQP_listsA[3], csv_second_recompress_sameQP_listsA[3], pkl_second_recompress_sameQP_listsA[3]))
second_sameQP_csv5 = list(zip(csv_second_sameQP_listsA[4], pkl_second_sameQP_listsA[4], csv_second_recompress_sameQP_listsA[4], pkl_second_recompress_sameQP_listsA[4]))
second_sameQP_csv6 = list(zip(csv_second_sameQP_listsA[5], pkl_second_sameQP_listsA[5], csv_second_recompress_sameQP_listsA[5], pkl_second_recompress_sameQP_listsA[5]))
second_sameQP_csv7 = list(zip(csv_second_sameQP_listsA[6], pkl_second_sameQP_listsA[6], csv_second_recompress_sameQP_listsA[6], pkl_second_recompress_sameQP_listsA[6]))
second_sameQP_csv8 = list(zip(csv_second_sameQP_listsA[7], pkl_second_sameQP_listsA[7], csv_second_recompress_sameQP_listsA[7], pkl_second_recompress_sameQP_listsA[7]))
second_sameQP_csv9 = list(zip(csv_second_sameQP_listsA[8], pkl_second_sameQP_listsA[8], csv_second_recompress_sameQP_listsA[8], pkl_second_recompress_sameQP_listsA[8]))
second_sameQP_csv10 = list(zip(csv_second_sameQP_listsA[9], pkl_second_sameQP_listsA[9], csv_second_recompress_sameQP_listsA[9], pkl_second_recompress_sameQP_listsA[9]))
print(len(second_sameQP_csv10))

second_sameQP_csv1 = random.sample(second_sameQP_csv1, 100)
second_sameQP_csv2 = random.sample(second_sameQP_csv2, 100)
second_sameQP_csv3 = random.sample(second_sameQP_csv3, 100)
second_sameQP_csv4 = random.sample(second_sameQP_csv4, 100)
second_sameQP_csv5 = random.sample(second_sameQP_csv5, 100)
second_sameQP_csv6 = random.sample(second_sameQP_csv6, 100)
second_sameQP_csv7 = random.sample(second_sameQP_csv7, 100)
second_sameQP_csv8 = random.sample(second_sameQP_csv8, 100)
second_sameQP_csv9 = random.sample(second_sameQP_csv9, 100)
print('\ndouble images train by QP1=QP2: ',len(second_sameQP_csv9))


second_QP5_QP5 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP5_QP5)]
second_QP10_QP10 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP10_QP10)]
second_QP16_QP16 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP16_QP16)]
second_QP20_QP20 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP20_QP20)]
second_QP24_QP24 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP24_QP24)]
second_QP27_QP27 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP27_QP27)]
second_QP32_QP32 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP32_QP32)]
second_QP39_QP39 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP39_QP39)]
second_QP42_QP42 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP42_QP42)]
second_QP45_QP45 = [item for item in second_sameQP_csv10 if any(qp in item[0] for qp in QP45_QP45)]
print('\ndouble images test by QP1 = QP2: ', len(second_QP5_QP5))

300

double images train by QP1=QP2:  100

double images test by QP1 = QP2:  30


In [11]:
# Large_QP2
second_largeQP2_csv1 = list(zip(csv_second_largeQP2_listsA[0], pkl_second_largeQP2_listsA[0], csv_second_recompress_largeQP2_listsA[0], pkl_second_recompress_largeQP2_listsA[0]))
second_largeQP2_csv2 = list(zip(csv_second_largeQP2_listsA[1], pkl_second_largeQP2_listsA[1], csv_second_recompress_largeQP2_listsA[1], pkl_second_recompress_largeQP2_listsA[1]))
second_largeQP2_csv3 = list(zip(csv_second_largeQP2_listsA[2], pkl_second_largeQP2_listsA[2], csv_second_recompress_largeQP2_listsA[2], pkl_second_recompress_largeQP2_listsA[2]))
second_largeQP2_csv4 = list(zip(csv_second_largeQP2_listsA[3], pkl_second_largeQP2_listsA[3], csv_second_recompress_largeQP2_listsA[3], pkl_second_recompress_largeQP2_listsA[3]))
second_largeQP2_csv5 = list(zip(csv_second_largeQP2_listsA[4], pkl_second_largeQP2_listsA[4], csv_second_recompress_largeQP2_listsA[4], pkl_second_recompress_largeQP2_listsA[4]))
second_largeQP2_csv6 = list(zip(csv_second_largeQP2_listsA[5], pkl_second_largeQP2_listsA[5], csv_second_recompress_largeQP2_listsA[5], pkl_second_recompress_largeQP2_listsA[5]))
second_largeQP2_csv7 = list(zip(csv_second_largeQP2_listsA[6], pkl_second_largeQP2_listsA[6], csv_second_recompress_largeQP2_listsA[6], pkl_second_recompress_largeQP2_listsA[6]))
second_largeQP2_csv8 = list(zip(csv_second_largeQP2_listsA[7], pkl_second_largeQP2_listsA[7], csv_second_recompress_largeQP2_listsA[7], pkl_second_recompress_largeQP2_listsA[7]))
second_largeQP2_csv9 = list(zip(csv_second_largeQP2_listsA[8], pkl_second_largeQP2_listsA[8], csv_second_recompress_largeQP2_listsA[8], pkl_second_recompress_largeQP2_listsA[8]))
second_largeQP2_csv10 = list(zip(csv_second_largeQP2_listsA[9], pkl_second_largeQP2_listsA[9], csv_second_recompress_largeQP2_listsA[9], pkl_second_recompress_largeQP2_listsA[9]))
print(len(second_largeQP2_csv1))

second_largeQP2_csv1 = random.sample(second_largeQP2_csv1, 100)
second_largeQP2_csv2 = random.sample(second_largeQP2_csv2, 100)
second_largeQP2_csv3 = random.sample(second_largeQP2_csv3, 100)
second_largeQP2_csv4 = random.sample(second_largeQP2_csv4, 100)
second_largeQP2_csv5 = random.sample(second_largeQP2_csv5, 100)
second_largeQP2_csv6 = random.sample(second_largeQP2_csv6, 100)
second_largeQP2_csv7 = random.sample(second_largeQP2_csv7, 100)
second_largeQP2_csv8 = random.sample(second_largeQP2_csv8, 100)
second_largeQP2_csv9 = random.sample(second_largeQP2_csv9, 100)
# second_largeQP2_csv10 = selected_data[9]
print('\ndouble images train by QP1<QP2: ', len(second_largeQP2_csv1))


second_QP10_QP16 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP16)]
second_QP10_QP20 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP20)]
second_QP10_QP24 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP24)]
second_QP10_QP27 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP27)]
second_QP10_QP32 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP32)]
second_QP10_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP39)]
second_QP10_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP42)]
second_QP10_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP10_QP45)]
second_QP15_QP16 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP16)]
second_QP15_QP20 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP20)]
second_QP15_QP24 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP24)]
second_QP15_QP27 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP27)]
second_QP15_QP32 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP32)]
second_QP15_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP39)]
second_QP15_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP42)]
second_QP15_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP15_QP45)]
second_QP20_QP24 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP20_QP24)]
second_QP20_QP27 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP20_QP27)]
second_QP20_QP32 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP20_QP32)]
second_QP20_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP20_QP39)]
second_QP20_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP20_QP42)]
second_QP20_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP20_QP45)]
second_QP25_QP27 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP25_QP27)]
second_QP25_QP32 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP25_QP32)]
second_QP25_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP25_QP39)]
second_QP25_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP25_QP42)]
second_QP25_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP25_QP45)]
second_QP30_QP32 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP30_QP32)]
second_QP30_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP30_QP39)]
second_QP30_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP30_QP42)]
second_QP30_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP30_QP45)]
second_QP32_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP32_QP39)]
second_QP32_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP32_QP42)]
second_QP32_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP32_QP45)]
second_QP35_QP39 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP35_QP39)]
second_QP35_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP35_QP42)]
second_QP35_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP35_QP45)]
second_QP40_QP42 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP40_QP42)]
second_QP40_QP45 = [item for item in second_largeQP2_csv10 if any(qp in item[0] for qp in QP40_QP45)]
print('\nDouble images by QP (QP1 < QP2): ', len(second_QP40_QP45))




1170

double images train by QP1<QP2:  100

Double images by QP (QP1 < QP2):  30


In [12]:
# Training_data
train_csv_list1 = single_csv1 + second_largeQP1_csv1 + second_sameQP_csv1 + second_largeQP2_csv1
train_csv_list2 = single_csv2 + second_largeQP1_csv2 + second_sameQP_csv2 + second_largeQP2_csv2
train_csv_list3 = single_csv3 + second_largeQP1_csv3 + second_sameQP_csv3 + second_largeQP2_csv3
train_csv_list4 = single_csv4 + second_largeQP1_csv4 + second_sameQP_csv4 + second_largeQP2_csv4
train_csv_list5 = single_csv5 + second_largeQP1_csv5 + second_sameQP_csv5 + second_largeQP2_csv5
train_csv_list6 = single_csv6 + second_largeQP1_csv6 + second_sameQP_csv6 + second_largeQP2_csv6
train_csv_list7 = single_csv7 + second_largeQP1_csv7 + second_sameQP_csv7 + second_largeQP2_csv7
train_csv_list8 = single_csv8 + second_largeQP1_csv8 + second_sameQP_csv8 + second_largeQP2_csv8
train_csv_list9 = single_csv9 + second_largeQP1_csv9 + second_sameQP_csv9 + second_largeQP2_csv9
print("train_csv_list: ", len(train_csv_list9))

train_csv_list:  600


In [13]:
test_QP10_QP5 = second_QP10_QP5 + single_QP5
test_QP15_QP5 = second_QP15_QP5 + single_QP5
test_QP15_QP10 = second_QP15_QP10 + single_QP10
test_QP20_QP5 = second_QP20_QP5 + single_QP5
test_QP20_QP10 = second_QP20_QP10 + single_QP10
test_QP20_QP16 = second_QP20_QP16 + single_QP16
test_QP25_QP5 = second_QP25_QP5 + single_QP5
test_QP25_QP10 = second_QP25_QP10 + single_QP10
test_QP25_QP16 = second_QP25_QP16 + single_QP16
test_QP25_QP20 = second_QP25_QP20 + single_QP20
test_QP25_QP24 = second_QP25_QP24 + single_QP24
test_QP30_QP5 = second_QP30_QP5 + single_QP5
test_QP30_QP10 = second_QP30_QP10 + single_QP10
test_QP30_QP16 = second_QP30_QP16 + single_QP16
test_QP30_QP20 = second_QP30_QP20 + single_QP20
test_QP30_QP24 = second_QP30_QP24 + single_QP24
test_QP30_QP27 = second_QP30_QP27 + single_QP27
test_QP32_QP5 = second_QP32_QP5 + single_QP5
test_QP32_QP10 = second_QP32_QP10 + single_QP10
test_QP32_QP16 = second_QP32_QP16 + single_QP16
test_QP32_QP20 = second_QP32_QP20 + single_QP20
test_QP32_QP24 = second_QP32_QP24 + single_QP24
test_QP32_QP27 = second_QP32_QP27 + single_QP27
test_QP35_QP5 = second_QP35_QP5 + single_QP5
test_QP35_QP10 = second_QP35_QP10 + single_QP10
test_QP35_QP16 = second_QP35_QP16 + single_QP16
test_QP35_QP20 = second_QP35_QP20 + single_QP20
test_QP35_QP24 = second_QP35_QP24 + single_QP24
test_QP35_QP27 = second_QP35_QP27 + single_QP27
test_QP35_QP32 = second_QP35_QP32 + single_QP32
test_QP40_QP5 = second_QP40_QP5 + single_QP5
test_QP40_QP10 = second_QP40_QP10 + single_QP10
test_QP40_QP16 = second_QP40_QP16 + single_QP16
test_QP40_QP20 = second_QP40_QP20 + single_QP20
test_QP40_QP24 = second_QP40_QP24 + single_QP24
test_QP40_QP27 = second_QP40_QP27 + single_QP27
test_QP40_QP32 = second_QP40_QP32 + single_QP32
test_QP40_QP39 = second_QP40_QP39 + single_QP39
test_QP45_QP5 = second_QP45_QP5 + single_QP5
test_QP45_QP10 = second_QP45_QP10 + single_QP10
test_QP45_QP16 = second_QP45_QP16 + single_QP16
test_QP45_QP20 = second_QP45_QP20 + single_QP20
test_QP45_QP24 = second_QP45_QP24 + single_QP24
test_QP45_QP27 = second_QP45_QP27 + single_QP27
test_QP45_QP32 = second_QP45_QP32 + single_QP32
test_QP45_QP39 = second_QP45_QP39 + single_QP39
test_QP45_QP42 = second_QP45_QP42 + single_QP42
test_QP50_QP5 = second_QP50_QP5 + single_QP5
test_QP50_QP10 = second_QP50_QP10 + single_QP10
test_QP50_QP16 = second_QP50_QP16 + single_QP16
test_QP50_QP20 = second_QP50_QP20 + single_QP20
test_QP50_QP24 = second_QP50_QP24 + single_QP24
test_QP50_QP27 = second_QP50_QP27 + single_QP27
test_QP50_QP32 = second_QP50_QP32 + single_QP32
test_QP50_QP39 = second_QP50_QP39 + single_QP39
test_QP50_QP42 = second_QP50_QP42 + single_QP42
test_QP50_QP45 = second_QP50_QP45 + single_QP45

print('test_QP50_QP45: ', len(test_QP50_QP45))

test_QP50_QP45:  60


In [14]:
test_QP5_QP5 = second_QP5_QP5 + single_QP5
test_QP10_QP10 = second_QP10_QP10 + single_QP10
test_QP16_QP16 = second_QP16_QP16 + single_QP16
test_QP20_QP20 = second_QP20_QP20 + single_QP20
test_QP24_QP24 = second_QP24_QP24 + single_QP24
test_QP27_QP27 = second_QP27_QP27 + single_QP27
test_QP32_QP32 = second_QP32_QP32 + single_QP32
test_QP39_QP39 = second_QP39_QP39 + single_QP39
test_QP42_QP42 = second_QP42_QP42 + single_QP42
test_QP45_QP45 = second_QP45_QP45 + single_QP45

print('test_QP45_QP45: ', len(test_QP45_QP45))

test_QP45_QP45:  60


In [15]:
test_QP10_QP16 = second_QP10_QP16 + single_QP16
test_QP10_QP20 = second_QP10_QP20 + single_QP20
test_QP10_QP24 = second_QP10_QP24 + single_QP24
test_QP10_QP27 = second_QP10_QP27 + single_QP27
test_QP10_QP32 = second_QP10_QP32 + single_QP32
test_QP10_QP39 = second_QP10_QP39 + single_QP39
test_QP10_QP42 = second_QP10_QP42 + single_QP42
test_QP10_QP45 = second_QP10_QP45 + single_QP45
test_QP15_QP16 = second_QP15_QP16 + single_QP16
test_QP15_QP20 = second_QP15_QP20 + single_QP20
test_QP15_QP24 = second_QP15_QP24 + single_QP24
test_QP15_QP27 = second_QP15_QP27 + single_QP27
test_QP15_QP32 = second_QP15_QP32 + single_QP32
test_QP15_QP39 = second_QP15_QP39 + single_QP39
test_QP15_QP42 = second_QP15_QP42 + single_QP42
test_QP15_QP45 = second_QP15_QP45 + single_QP45
test_QP20_QP24 = second_QP20_QP24 + single_QP24
test_QP20_QP27 = second_QP20_QP27 + single_QP27
test_QP20_QP32 = second_QP20_QP32 + single_QP32
test_QP20_QP39 = second_QP20_QP39 + single_QP39
test_QP20_QP42 = second_QP20_QP42 + single_QP42
test_QP20_QP45 = second_QP20_QP45 + single_QP45
test_QP25_QP27 = second_QP25_QP27 + single_QP27
test_QP25_QP32 = second_QP25_QP32 + single_QP32
test_QP25_QP39 = second_QP25_QP39 + single_QP39
test_QP25_QP42 = second_QP25_QP42 + single_QP42
test_QP25_QP45 = second_QP25_QP45 + single_QP45
test_QP30_QP32 = second_QP30_QP32 + single_QP32
test_QP30_QP39 = second_QP30_QP39 + single_QP39
test_QP30_QP42 = second_QP30_QP42 + single_QP42
test_QP30_QP45 = second_QP30_QP45 + single_QP45
test_QP32_QP39 = second_QP32_QP39 + single_QP39
test_QP32_QP42 = second_QP32_QP42 + single_QP42
test_QP32_QP45 = second_QP32_QP45 + single_QP45
test_QP35_QP39 = second_QP35_QP39 + single_QP39
test_QP35_QP42 = second_QP35_QP42 + single_QP42
test_QP35_QP45 = second_QP35_QP45 + single_QP45
test_QP40_QP42 = second_QP40_QP42 + single_QP42
test_QP40_QP45 = second_QP40_QP45 + single_QP45


print('test_QP40_QP45: ', len(test_QP40_QP45))

test_QP40_QP45:  60


In [16]:
def laplace_smoothing(probabilities, alpha=1):
    """
    ラプラス平滑化を行う関数
    
    Args:
    probabilities (list): 平滑化する確率分布のリスト
    alpha (float): 平滑化パラメータ
    
    Returns:
    smoothed_probabilities (list): 平滑化された確率分布のリスト
    """
    total_count = sum(probabilities)
    num_elements = len(probabilities)
    
    smoothed_probabilities = [(count + alpha) / (total_count + alpha * num_elements) for count in probabilities]
    
    return smoothed_probabilities


def process_train_csv_lists(train_csv_list):
    pu_columns = ["PU1_64", "PU1_32", "PU1_16", "PU1_8", "PU1_4",  
                  "PU2_64","PU2_32", "PU2_16", "PU2_8", "PU2_4"]

#     luminance_columns = ["LU1_0","LU1_1","LU1_2","LU1_3",
#                          "LU1_4","LU1_5","LU1_6","LU1_7",
#                          "LU1_8","LU1_9","LU1_10","LU1_11",
#                          "LU1_12","LU1_13","LU1_14","LU1_15",
#                          "LU1_16","LU1_17","LU1_18","LU1_19",
#                          "LU1_20","LU1_21","LU1_22","LU1_23",
#                          "LU1_24","LU1_25","LU1_26","LU1_27",
#                          "LU1_28","LU1_29","LU1_30","LU1_31",
#                          "LU1_32","LU1_33","LU1_34",
                         
#                          "LU2_0","LU2_1","LU2_2","LU2_3",
#                          "LU2_4","LU2_5","LU2_6","LU2_7",
#                          "LU2_8","LU2_9","LU2_10","LU2_11",
#                          "LU2_12","LU2_13","LU2_14","LU2_15",
#                          "LU2_16","LU2_17","LU2_18","LU2_19",
#                          "LU2_20","LU2_21","LU2_22","LU2_23",
#                          "LU2_24","LU2_25","LU2_26","LU2_27",
#                          "LU2_28","LU2_29","LU2_30","LU2_31",
#                          "LU2_32","LU2_33","LU2_34"]
    
    luminance_columns = ["LU1_0","LU1_1","LU1_9","LU1_10","LU1_11","LU1_25","LU1_26","LU1_27",
                         "LU2_0","LU2_1","LU2_9","LU2_10","LU2_11", "LU2_25","LU2_26","LU2_27"]

    chrominance_columns = ["CH1_0", "CH1_1", "CH1_10", "CH1_26", "CH1_34", "CH1_36", 
                           "CH2_0", "CH2_1", "CH2_10", "CH2_26", "CH2_34", "CH2_36"]
    
    
    
    label_columns = ["LABEL"]
    mae1_columns = [f"MAE1_{i}" for i in range(52)]
    mae2_columns = [f"MAE2_{i}" for i in range(52)]
    mae_columns = ["MAE"]
    final_qp_columns = ["FINAL_QP"]
    kl_divergence1 = ["KLD_PU"]
    kl_divergence2 = ["KLD_LUMA"]
    kl_divergence3 = ["KLD_CHROMA"]
    ratio_columns1 = ["RATIO1"]
    ratio_columns2 = ["RATIO2"]
    
    train_df1_1 = pd.DataFrame(columns=pu_columns)
    train_df1_2 = pd.DataFrame(columns=luminance_columns)
    train_df1_3 = pd.DataFrame(columns=chrominance_columns)
    LABEL = pd.DataFrame(columns=label_columns)
    RATIO1 = pd.DataFrame(columns=ratio_columns1)
    RATIO2 = pd.DataFrame(columns=ratio_columns2)
    train_df3 = pd.DataFrame(columns=mae1_columns)
    train_df4 = pd.DataFrame(columns=mae2_columns)
    MAE = pd.DataFrame(columns=mae_columns)
    FINAL_QP = pd.DataFrame(columns=final_qp_columns)
    kl_divergence_df1 = pd.DataFrame(columns=kl_divergence1)
    kl_divergence_df2 = pd.DataFrame(columns=kl_divergence2)
    kl_divergence_df3 = pd.DataFrame(columns=kl_divergence3)

    for path1, path2, path3, path4 in train_csv_list:
        label = 1 if ("2ndQP" in path1) and ("3rdQP" in path3) else 0
        train_pkl_list = [path2, path4]
        df1 = pd.read_csv(path1)
        df2 = pd.read_csv(path3)
        
        # 平滑化を行う
        probabilities_df1 = laplace_smoothing([df1.loc[i, "pu_counts"] for i in [0,1,2,3,4]])
        probabilities_df2 = laplace_smoothing([df2.loc[i, "pu_counts"] for i in [0,1,2,3,4]])
        kl_divergence1 = entropy(probabilities_df1, probabilities_df2)
        
        probabilities_df3 = laplace_smoothing([df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]])
        probabilities_df4 = laplace_smoothing([df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]])
        kl_divergence2 = entropy(probabilities_df3, probabilities_df4)
        
        probabilities_df5 = laplace_smoothing([df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]])
        probabilities_df6 = laplace_smoothing([df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]])
        kl_divergence3 = entropy(probabilities_df5, probabilities_df6)
        
        
        pu_values = [df1.loc[i, "pu_counts"] for i in range(5)] + [df2.loc[i, "pu_counts"] for i in range(5)]
        # lu_values = [df1.loc[i, "luminance_counts"] for i in range(35)] + [df2.loc[i, "luminance_counts"] for i in range(35)]
        lu_values = [df1.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]] + [df2.loc[i, "luminance_counts"] for i in [0,1,9,10,11,25,26,27]]
        ch_values = [df1.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]] + [df2.loc[i, "chroma_counts"] for i in [0,1,10,26,34,36]]
        
        train_df1_1 = pd.concat([train_df1_1, pd.DataFrame([pu_values], columns=pu_columns)], ignore_index=True)
        train_df1_2= pd.concat([train_df1_2, pd.DataFrame([lu_values], columns=luminance_columns)], ignore_index=True)
        train_df1_3 = pd.concat([train_df1_3, pd.DataFrame([ch_values], columns=chrominance_columns)], ignore_index=True)
        
        kl_divergence_df1 = pd.concat([kl_divergence_df1, pd.DataFrame({"KLD_PU": [kl_divergence1]})], ignore_index=True)
        kl_divergence_df2 = pd.concat([kl_divergence_df2, pd.DataFrame({"KLD_LUMA": [kl_divergence2]})], ignore_index=True)
        kl_divergence_df3 = pd.concat([kl_divergence_df3, pd.DataFrame({"KLD_CHROMA": [kl_divergence3]})], ignore_index=True)


        LABEL = pd.concat([LABEL, pd.DataFrame({"LABEL": [label]})], ignore_index=True)

        final_QP = extract_finalQP(train_pkl_list[0])

        mae_d1 = calculate_mae(train_pkl_list[0])
        mae_d2 = calculate_mae(train_pkl_list[1])
        ratio1 = ratio_double_compressed(mae_d1, final_QP)
        ratio2 = ratio_double_compressed(mae_d2, final_QP)

        RATIO1 = pd.concat([RATIO1, pd.DataFrame({"RATIO1": [ratio1]})], ignore_index=True)
        RATIO2 = pd.concat([RATIO2, pd.DataFrame({"RATIO2": [ratio2]})], ignore_index=True)

        train_df3 = pd.concat([train_df3, pd.DataFrame({f"MAE1_{i}": [mae_d1[i]] for i in range(52)})], ignore_index=True)
        train_df4 = pd.concat([train_df4, pd.DataFrame({f"MAE2_{i}": [mae_d2[i]] for i in range(52)})], ignore_index=True)
        MAE = pd.concat([MAE, pd.DataFrame({"MAE": [mae_d1]})], ignore_index=True)
        FINAL_QP = pd.concat([FINAL_QP, pd.DataFrame({"FINAL_QP": [final_QP]})], ignore_index=True)

    train_df1_1.reset_index(drop=True, inplace=True)
    train_df1_2.reset_index(drop=True, inplace=True)
    train_df1_3.reset_index(drop=True, inplace=True)
    LABEL.reset_index(drop=True, inplace=True)
    RATIO1.reset_index(drop=True, inplace=True)
    RATIO2.reset_index(drop=True, inplace=True)
    kl_divergence_df1.reset_index(drop=True, inplace=True)
    kl_divergence_df2.reset_index(drop=True, inplace=True)
    kl_divergence_df3.reset_index(drop=True, inplace=True)

    # train_df = pd.concat([train_df1_1, train_df1_2, train_df1_3, train_df3, train_df4], axis=1)
    train_df = pd.concat([FINAL_QP, kl_divergence_df1, kl_divergence_df2, kl_divergence_df3, RATIO1, RATIO2], axis=1)
    train_df_onlyGhost = pd.concat([FINAL_QP, kl_divergence_df1, kl_divergence_df2, kl_divergence_df3, RATIO1, RATIO2], axis=1)

    return train_df, LABEL, MAE, FINAL_QP


In [17]:
train_df1, LABEL1, MAE1, FINAL_QP1 = process_train_csv_lists(train_csv_list1)
train_df2, LABEL2, MAE2, FINAL_QP2 = process_train_csv_lists(train_csv_list2)
train_df3, LABEL3, MAE3, FINAL_QP3 = process_train_csv_lists(train_csv_list3)
train_df4, LABEL4, MAE4, FINAL_QP4 = process_train_csv_lists(train_csv_list4)
train_df5, LABEL5, MAE5, FINAL_QP5 = process_train_csv_lists(train_csv_list5)
train_df6, LABEL6, MAE6, FINAL_QP6 = process_train_csv_lists(train_csv_list6)
train_df7, LABEL7, MAE7, FINAL_QP7 = process_train_csv_lists(train_csv_list7)
train_df8, LABEL8, MAE8, FINAL_QP8 = process_train_csv_lists(train_csv_list8)
train_df9, LABEL9, MAE9, FINAL_QP9 = process_train_csv_lists(train_csv_list9)

In [18]:
# 1番目のCSVファイルを処理する
test_df1, LABEL_t1, MAE_t1, FINAL_QP_t1 = process_train_csv_lists(test_QP10_QP5)

# 2番目のCSVファイルを処理する
test_df2, LABEL_t2, MAE_t2, FINAL_QP_t2 = process_train_csv_lists(test_QP15_QP5)

# 3番目のCSVファイルを処理する
test_df3, LABEL_t3, MAE_t3, FINAL_QP_t3 = process_train_csv_lists(test_QP15_QP10)

# 4番目のCSVファイルを処理する
test_df4, LABEL_t4, MAE_t4, FINAL_QP_t4 = process_train_csv_lists(test_QP20_QP5)

# 5番目のCSVファイルを処理する
test_df5, LABEL_t5, MAE_t5, FINAL_QP_t5 = process_train_csv_lists(test_QP20_QP10)

# 6番目のCSVファイルを処理する
test_df6, LABEL_t6, MAE_t6, FINAL_QP_t6 = process_train_csv_lists(test_QP20_QP16)

# 7番目のCSVファイルを処理する
test_df7, LABEL_t7, MAE_t7, FINAL_QP_t7 = process_train_csv_lists(test_QP25_QP5)

# 8番目のCSVファイルを処理する
test_df8, LABEL_t8, MAE_t8, FINAL_QP_t8 = process_train_csv_lists(test_QP25_QP10)

# 9番目のCSVファイルを処理する
test_df9, LABEL_t9, MAE_t9, FINAL_QP_t9 = process_train_csv_lists(test_QP25_QP16)

# 10番目のCSVファイルを処理する
test_df10, LABEL_t10, MAE_t10, FINAL_QP_t10 = process_train_csv_lists(test_QP25_QP20)

# 11番目のCSVファイルを処理する
test_df11, LABEL_t11, MAE_t11, FINAL_QP_t11 = process_train_csv_lists(test_QP25_QP24)

# 12番目のCSVファイルを処理する
test_df12, LABEL_t12, MAE_t12, FINAL_QP_t12 = process_train_csv_lists(test_QP30_QP5)

# 13番目のCSVファイルを処理する
test_df13, LABEL_t13, MAE_t13, FINAL_QP_t13 = process_train_csv_lists(test_QP30_QP10)

# 14番目のCSVファイルを処理する
test_df14, LABEL_t14, MAE_t14, FINAL_QP_t14 = process_train_csv_lists(test_QP30_QP16)

# 15番目のCSVファイルを処理する
test_df15, LABEL_t15, MAE_t15, FINAL_QP_t15 = process_train_csv_lists(test_QP30_QP20)

# 16番目のCSVファイルを処理する
test_df16, LABEL_t16, MAE_t16, FINAL_QP_t16 = process_train_csv_lists(test_QP30_QP24)

# 17番目のCSVファイルを処理する
test_df17, LABEL_t17, MAE_t17, FINAL_QP_t17 = process_train_csv_lists(test_QP30_QP27)

# 18番目のCSVファイルを処理する
test_df18, LABEL_t18, MAE_t18, FINAL_QP_t18 = process_train_csv_lists(test_QP32_QP5)

# 19番目のCSVファイルを処理する
test_df19, LABEL_t19, MAE_t19, FINAL_QP_t19 = process_train_csv_lists(test_QP32_QP10)

# 20番目のCSVファイルを処理する
test_df20, LABEL_t20, MAE_t20, FINAL_QP_t20 = process_train_csv_lists(test_QP32_QP16)

# 21番目のCSVファイルを処理する
test_df21, LABEL_t21, MAE_t21, FINAL_QP_t21 = process_train_csv_lists(test_QP32_QP20)

# 22番目のCSVファイルを処理する
test_df22, LABEL_t22, MAE_t22, FINAL_QP_t22 = process_train_csv_lists(test_QP32_QP24)

# 23番目のCSVファイルを処理する
test_df23, LABEL_t23, MAE_t23, FINAL_QP_t23 = process_train_csv_lists(test_QP32_QP27)

# 24番目のCSVファイルを処理する
test_df24, LABEL_t24, MAE_t24, FINAL_QP_t24 = process_train_csv_lists(test_QP35_QP5)

# 25番目のCSVファイルを処理する
test_df25, LABEL_t25, MAE_t25, FINAL_QP_t25 = process_train_csv_lists(test_QP35_QP10)

# 26番目のCSVファイルを処理する
test_df26, LABEL_t26, MAE_t26, FINAL_QP_t26 = process_train_csv_lists(test_QP35_QP16)

# 27番目のCSVファイルを処理する
test_df27, LABEL_t27, MAE_t27, FINAL_QP_t27 = process_train_csv_lists(test_QP35_QP20)

# 28番目のCSVファイルを処理する
test_df28, LABEL_t28, MAE_t28, FINAL_QP_t28 = process_train_csv_lists(test_QP35_QP24)

# 29番目のCSVファイルを処理する
test_df29, LABEL_t29, MAE_t29, FINAL_QP_t29 = process_train_csv_lists(test_QP35_QP27)

# 30番目のCSVファイルを処理する
test_df30, LABEL_t30, MAE_t30, FINAL_QP_t30 = process_train_csv_lists(test_QP35_QP32)

# 31番目のCSVファイルを処理する
test_df31, LABEL_t31, MAE_t31, FINAL_QP_t31 = process_train_csv_lists(test_QP40_QP5)

# 32番目のCSVファイルを処理する
test_df32, LABEL_t32, MAE_t32, FINAL_QP_t32 = process_train_csv_lists(test_QP40_QP10)

# 33番目のCSVファイルを処理する
test_df33, LABEL_t33, MAE_t33, FINAL_QP_t33 = process_train_csv_lists(test_QP40_QP16)

# 34番目のCSVファイルを処理する
test_df34, LABEL_t34, MAE_t34, FINAL_QP_t34 = process_train_csv_lists(test_QP40_QP20)

# 35番目のCSVファイルを処理する
test_df35, LABEL_t35, MAE_t35, FINAL_QP_t35 = process_train_csv_lists(test_QP40_QP24)

# 36番目のCSVファイルを処理する
test_df36, LABEL_t36, MAE_t36, FINAL_QP_t36 = process_train_csv_lists(test_QP40_QP27)

# 37番目のCSVファイルを処理する
test_df37, LABEL_t37, MAE_t37, FINAL_QP_t37 = process_train_csv_lists(test_QP40_QP32)

# 38番目のCSVファイルを処理する
test_df38, LABEL_t38, MAE_t38, FINAL_QP_t38 = process_train_csv_lists(test_QP40_QP39)

# 39番目のCSVファイルを処理する
test_df39, LABEL_t39, MAE_t39, FINAL_QP_t39 = process_train_csv_lists(test_QP45_QP5)

# 40番目のCSVファイルを処理する
test_df40, LABEL_t40, MAE_t40, FINAL_QP_t40 = process_train_csv_lists(test_QP45_QP10)

# 41番目のCSVファイルを処理する
test_df41, LABEL_t41, MAE_t41, FINAL_QP_t41 = process_train_csv_lists(test_QP45_QP16)

# 42番目のCSVファイルを処理する
test_df42, LABEL_t42, MAE_t42, FINAL_QP_t42 = process_train_csv_lists(test_QP45_QP20)

# 43番目のCSVファイルを処理する
test_df43, LABEL_t43, MAE_t43, FINAL_QP_t43 = process_train_csv_lists(test_QP45_QP24)

# 44番目のCSVファイルを処理する
test_df44, LABEL_t44, MAE_t44, FINAL_QP_t44 = process_train_csv_lists(test_QP45_QP27)

# 45番目のCSVファイルを処理する
test_df45, LABEL_t45, MAE_t45, FINAL_QP_t45 = process_train_csv_lists(test_QP45_QP32)

# 46番目のCSVファイルを処理する
test_df46, LABEL_t46, MAE_t46, FINAL_QP_t46 = process_train_csv_lists(test_QP45_QP39)

# 47番目のCSVファイルを処理する
test_df47, LABEL_t47, MAE_t47, FINAL_QP_t47 = process_train_csv_lists(test_QP45_QP42)

# 48番目のCSVファイルを処理する
test_df48, LABEL_t48, MAE_t48, FINAL_QP_t48 = process_train_csv_lists(test_QP50_QP5)

# 49番目のCSVファイルを処理する
test_df49, LABEL_t49, MAE_t49, FINAL_QP_t49 = process_train_csv_lists(test_QP50_QP10)

# 50番目のCSVファイルを処理する
test_df50, LABEL_t50, MAE_t50, FINAL_QP_t50 = process_train_csv_lists(test_QP50_QP16)

# 51番目のCSVファイルを処理する
test_df51, LABEL_t51, MAE_t51, FINAL_QP_t51 = process_train_csv_lists(test_QP50_QP20)

# 52番目のCSVファイルを処理する
test_df52, LABEL_t52, MAE_t52, FINAL_QP_t52 = process_train_csv_lists(test_QP50_QP24)

# 53番目のCSVファイルを処理する
test_df53, LABEL_t53, MAE_t53, FINAL_QP_t53 = process_train_csv_lists(test_QP50_QP27)

# 54番目のCSVファイルを処理する
test_df54, LABEL_t54, MAE_t54, FINAL_QP_t54 = process_train_csv_lists(test_QP50_QP32)

# 55番目のCSVファイルを処理する
test_df55, LABEL_t55, MAE_t55, FINAL_QP_t55 = process_train_csv_lists(test_QP50_QP39)

# 56番目のCSVファイルを処理する
test_df56, LABEL_t56, MAE_t56, FINAL_QP_t56 = process_train_csv_lists(test_QP50_QP42)

# 57番目のCSVファイルを処理する
test_df57, LABEL_t57, MAE_t57, FINAL_QP_t57 = process_train_csv_lists(test_QP50_QP45)


In [19]:
# 58番目のCSVファイルを処理する
test_df58, LABEL_t58, MAE_t58, FINAL_QP_t58 = process_train_csv_lists(test_QP5_QP5)

# 59番目のCSVファイルを処理する
test_df59, LABEL_t59, MAE_t59, FINAL_QP_t59 = process_train_csv_lists(test_QP10_QP10)

# 60番目のCSVファイルを処理する
test_df60, LABEL_t60, MAE_t60, FINAL_QP_t60 = process_train_csv_lists(test_QP16_QP16)

# 61番目のCSVファイルを処理する
test_df61, LABEL_t61, MAE_t61, FINAL_QP_t61 = process_train_csv_lists(test_QP20_QP20)

# 62番目のCSVファイルを処理する
test_df62, LABEL_t62, MAE_t62, FINAL_QP_t62 = process_train_csv_lists(test_QP24_QP24)

# 63番目のCSVファイルを処理する
test_df63, LABEL_t63, MAE_t63, FINAL_QP_t63 = process_train_csv_lists(test_QP27_QP27)

# 64番目のCSVファイルを処理する
test_df64, LABEL_t64, MAE_t64, FINAL_QP_t64 = process_train_csv_lists(test_QP32_QP32)

# 65番目のCSVファイルを処理する
test_df65, LABEL_t65, MAE_t65, FINAL_QP_t65 = process_train_csv_lists(test_QP39_QP39)

# 66番目のCSVファイルを処理する
test_df66, LABEL_t66, MAE_t66, FINAL_QP_t66 = process_train_csv_lists(test_QP42_QP42)

# 67番目のCSVファイルを処理する
test_df67, LABEL_t67, MAE_t67, FINAL_QP_t67 = process_train_csv_lists(test_QP45_QP45)


In [20]:
# 68番目のCSVファイルを処理する
test_df68, LABEL_t68, MAE_t68, FINAL_QP_t68 = process_train_csv_lists(test_QP10_QP16)

# 69番目のCSVファイルを処理する
test_df69, LABEL_t69, MAE_t69, FINAL_QP_t69 = process_train_csv_lists(test_QP10_QP20)

# 70番目のCSVファイルを処理する
test_df70, LABEL_t70, MAE_t70, FINAL_QP_t70 = process_train_csv_lists(test_QP10_QP24)

# 71番目のCSVファイルを処理する
test_df71, LABEL_t71, MAE_t71, FINAL_QP_t71 = process_train_csv_lists(test_QP10_QP27)

# 72番目のCSVファイルを処理する
test_df72, LABEL_t72, MAE_t72, FINAL_QP_t72 = process_train_csv_lists(test_QP10_QP32)

# 73番目のCSVファイルを処理する
test_df73, LABEL_t73, MAE_t73, FINAL_QP_t73 = process_train_csv_lists(test_QP10_QP39)

# 74番目のCSVファイルを処理する
test_df74, LABEL_t74, MAE_t74, FINAL_QP_t74 = process_train_csv_lists(test_QP10_QP42)

# 75番目のCSVファイルを処理する
test_df75, LABEL_t75, MAE_t75, FINAL_QP_t75 = process_train_csv_lists(test_QP10_QP45)

# 76番目のCSVファイルを処理する
test_df76, LABEL_t76, MAE_t76, FINAL_QP_t76 = process_train_csv_lists(test_QP15_QP16)

# 77番目のCSVファイルを処理する
test_df77, LABEL_t77, MAE_t77, FINAL_QP_t77 = process_train_csv_lists(test_QP15_QP20)

# 78番目のCSVファイルを処理する
test_df78, LABEL_t78, MAE_t78, FINAL_QP_t78 = process_train_csv_lists(test_QP15_QP24)

# 79番目のCSVファイルを処理する
test_df79, LABEL_t79, MAE_t79, FINAL_QP_t79 = process_train_csv_lists(test_QP15_QP27)

# 80番目のCSVファイルを処理する
test_df80, LABEL_t80, MAE_t80, FINAL_QP_t80 = process_train_csv_lists(test_QP15_QP32)

# 81番目のCSVファイルを処理する
test_df81, LABEL_t81, MAE_t81, FINAL_QP_t81 = process_train_csv_lists(test_QP15_QP39)

# 82番目のCSVファイルを処理する
test_df82, LABEL_t82, MAE_t82, FINAL_QP_t82 = process_train_csv_lists(test_QP15_QP42)

# 83番目のCSVファイルを処理する
test_df83, LABEL_t83, MAE_t83, FINAL_QP_t83 = process_train_csv_lists(test_QP15_QP45)

# 84番目のCSVファイルを処理する
test_df84, LABEL_t84, MAE_t84, FINAL_QP_t84 = process_train_csv_lists(test_QP20_QP24)

# 85番目のCSVファイルを処理する
test_df85, LABEL_t85, MAE_t85, FINAL_QP_t85 = process_train_csv_lists(test_QP20_QP27)

# 86番目のCSVファイルを処理する
test_df86, LABEL_t86, MAE_t86, FINAL_QP_t86 = process_train_csv_lists(test_QP20_QP32)

# 87番目のCSVファイルを処理する
test_df87, LABEL_t87, MAE_t87, FINAL_QP_t87 = process_train_csv_lists(test_QP20_QP39)

# 88番目のCSVファイルを処理する
test_df88, LABEL_t88, MAE_t88, FINAL_QP_t88 = process_train_csv_lists(test_QP20_QP42)

# 89番目のCSVファイルを処理する
test_df89, LABEL_t89, MAE_t89, FINAL_QP_t89 = process_train_csv_lists(test_QP20_QP45)

# 90番目のCSVファイルを処理する
test_df90, LABEL_t90, MAE_t90, FINAL_QP_t90 = process_train_csv_lists(test_QP25_QP27)

# 91番目のCSVファイルを処理する
test_df91, LABEL_t91, MAE_t91, FINAL_QP_t91 = process_train_csv_lists(test_QP25_QP32)

# 92番目のCSVファイルを処理する
test_df92, LABEL_t92, MAE_t92, FINAL_QP_t92 = process_train_csv_lists(test_QP25_QP39)

# 93番目のCSVファイルを処理する
test_df93, LABEL_t93, MAE_t93, FINAL_QP_t93 = process_train_csv_lists(test_QP25_QP42)

# 94番目のCSVファイルを処理する
test_df94, LABEL_t94, MAE_t94, FINAL_QP_t94 = process_train_csv_lists(test_QP25_QP45)

# 95番目のCSVファイルを処理する
test_df95, LABEL_t95, MAE_t95, FINAL_QP_t95 = process_train_csv_lists(test_QP30_QP32)

# 96番目のCSVファイルを処理する
test_df96, LABEL_t96, MAE_t96, FINAL_QP_t96 = process_train_csv_lists(test_QP30_QP39)

# 97番目のCSVファイルを処理する
test_df97, LABEL_t97, MAE_t97, FINAL_QP_t97 = process_train_csv_lists(test_QP30_QP42)

# 98番目のCSVファイルを処理する
test_df98, LABEL_t98, MAE_t98, FINAL_QP_t98 = process_train_csv_lists(test_QP30_QP45)

# 99番目のCSVファイルを処理する
test_df99, LABEL_t99, MAE_t99, FINAL_QP_t99 = process_train_csv_lists(test_QP32_QP39)

# 100番目のCSVファイルを処理する
test_df100, LABEL_t100, MAE_t100, FINAL_QP_t100 = process_train_csv_lists(test_QP32_QP42)

# 101番目のCSVファイルを処理する
test_df101, LABEL_t101, MAE_t101, FINAL_QP_t101 = process_train_csv_lists(test_QP32_QP45)

# 102番目のCSVファイルを処理する
test_df102, LABEL_t102, MAE_t102, FINAL_QP_t102 = process_train_csv_lists(test_QP35_QP39)

# 103番目のCSVファイルを処理する
test_df103, LABEL_t103, MAE_t103, FINAL_QP_t103 = process_train_csv_lists(test_QP35_QP42)

# 104番目のCSVファイルを処理する
test_df104, LABEL_t104, MAE_t104, FINAL_QP_t104 = process_train_csv_lists(test_QP35_QP45)

# 105番目のCSVファイルを処理する
test_df105, LABEL_t105, MAE_t105, FINAL_QP_t105 = process_train_csv_lists(test_QP40_QP42)

# 106番目のCSVファイルを処理する
test_df106, LABEL_t106, MAE_t106, FINAL_QP_t106 = process_train_csv_lists(test_QP40_QP45)


In [21]:
print(train_df1)
print(test_df55)

    FINAL_QP    KLD_PU  KLD_LUMA KLD_CHROMA    RATIO1    RATIO2
0         10  0.000668  0.001769    0.00282  0.197189  0.102853
1         16  0.000948  0.001313   0.002801   0.06693  0.029418
2         20  0.000115  0.001618   0.000941  0.026094  0.011189
3         24    0.0001  0.002771   0.000861   0.01108   0.00542
4         27  0.000491   0.00333   0.001836  0.121726  0.106972
..       ...       ...       ...        ...       ...       ...
595       32  0.000187  0.001376   0.000904  0.175257  0.174977
596       32  0.000017  0.003239   0.006368  0.222156  0.210483
597       45  0.002369  0.006054   0.003319   0.15296  0.192275
598       45  0.000982   0.00363   0.018799  0.142455  0.161556
599       24  0.000283  0.002734   0.001622  0.040523  0.038514

[600 rows x 6 columns]
   FINAL_QP    KLD_PU  KLD_LUMA KLD_CHROMA    RATIO1    RATIO2
0        39   0.00007  0.000791   0.000468  0.706337  0.646728
1        39  0.000006  0.007323   0.000999  0.758954  0.710146
2        39  0.0000

In [22]:
def process_results_to_lists(train_df, LABEL, MAE, FINAL_QP):
    scaler = MinMaxScaler()

    # スケーラーを使って結合したデータをスケーリング
    X_train = scaler.fit_transform(train_df)

    # pandasをndarrayに変換
    MAE_array = MAE.values
    FINAL_QP_array = FINAL_QP.values

    # ラベルの準備
    Y_train = LABEL['LABEL'].astype(int)

    return X_train, MAE_array, FINAL_QP_array, Y_train

def append_results_to_lists(train_df, LABEL, MAE, FINAL_QP, X_train_list, MAE_list, FINAL_QP_list, Y_train_list):
    X_train, MAE_array, FINAL_QP_array, Y_train = process_results_to_lists(train_df, LABEL, MAE, FINAL_QP)
    X_train_list.append(X_train)
    # X_train_onlyGhost_list.append(X_train_onlyGhost)
    MAE_list.append(MAE_array)
    FINAL_QP_list.append(FINAL_QP_array)
    Y_train_list.append(Y_train)

# リストを初期化
X_train_list = []
MAE_list = []
FINAL_QP_list = []
Y_train_list = []

for i in range(1, 107):
    globals()[f'X_test_list{i}'] = []
    globals()[f'MAE_list_t{i}'] = []
    globals()[f'FINAL_QP_list_t{i}'] = []
    globals()[f'Y_test_list{i}'] = []


In [23]:
# データを処理してリストに追加
append_results_to_lists(train_df1, LABEL1, MAE1, FINAL_QP1, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df2, LABEL2, MAE2, FINAL_QP2, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df3, LABEL3, MAE3, FINAL_QP3, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df4, LABEL4, MAE4, FINAL_QP4, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df5, LABEL5, MAE5, FINAL_QP5, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df6, LABEL6, MAE6, FINAL_QP6, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df7, LABEL7, MAE7, FINAL_QP7, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df8, LABEL8, MAE8, FINAL_QP8, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)
append_results_to_lists(train_df9, LABEL9, MAE9, FINAL_QP9, X_train_list, MAE_list, FINAL_QP_list, Y_train_list)


In [24]:
for i in range(1, 107):
    eval(f'append_results_to_lists(test_df{i}, LABEL_t{i}, MAE_t{i}, FINAL_QP_t{i}, X_test_list{i}, MAE_list_t{i}, FINAL_QP_list_t{i}, Y_test_list{i})')

In [25]:
# Cの範囲を指定
# C_values = {'C': [0.01, 0.1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000, 2000, 3000, 4000, 5000]}
C_values = {'C': [0.01, 0.1, 1, 10, 100, 1000, 2000, 3000, 4000, 5000]}

kfold = KFold(n_splits=9, shuffle=True, random_state=42)

# データフレームを初期化
results = pd.DataFrame()

# 1から106までの列名を作成し、データフレームに追加
for i in range(1, 107):
    columns = [
        f'C_RBF{i}', f'Score_RBF{i}', f'tnr_rbf{i}', f'tpr_rbf{i}',
        f'C_LINEAR{i}', f'Score_LINEAR{i}', f'tnr_linear{i}', f'tpr_linear{i}',
        f'Threshold{i}', f'Score_old{i}', f'tnr_old{i}', f'tpr_old{i}'
    ]
    results = pd.concat([results, pd.DataFrame(columns=columns)], axis=1)

    
X_index = np.arange(9)  # インデックスとして0から8までの数字を用意

# ループで各分割のtrain_idsとtest_idsを取得
for fold, (train_ids, test_ids) in enumerate(kfold.split(X_index)):
    print(f"<Fold-{fold+1}>")
    print("Train indices:", train_ids)
    print("Test indices:", test_ids)
    
    train_data = [X_train_list[i] for i in train_ids]
    train_label = [Y_train_list[i] for i in train_ids]
    
    val_data = [X_train_list[i] for i in test_ids]
    val_label = [Y_train_list[i] for i in test_ids]
        
    X_train = [item for data in train_data for item in data]
    Y_train = [item for data in train_label for item in data]
    
    X_val = [item for data in val_data for item in data]
    Y_val = [item for data in val_label for item in data]
    
    print(len(Y_train))
    print(len(Y_val))
    
    # リストの作成（1から106まで）
    for i in range(1, 107):
        globals()[f'test_data{i}'] = [item for data in globals()[f'X_test_list{i}'] for item in data]
        globals()[f'test_label{i}'] = [item for data in globals()[f'Y_test_list{i}'] for item in data]
        globals()[f'MAE_data{i}'] = [item for data in globals()[f'MAE_list_t{i}'] for item in data]
        globals()[f'FINAL_QP_data{i}'] = [item for data in globals()[f'FINAL_QP_list_t{i}'] for item in data]

        globals()[f'best_threshold{i}'] = 0
        globals()[f'best_accuracy{i}'] = 0
        globals()[f'best_predicted_labels{i}'] = []
        globals()[f'best_ground_truth_labels{i}'] = []
        
        for threshold in np.arange(0.01, 1.00, 0.01):
            test_old = np.array([is_double_compressed(globals()[f'MAE_data{i}'][j], globals()[f'FINAL_QP_data{i}'][j], threshold) for j in range(60)])
            predicted_labels = test_old.astype(int)
            ground_truth_labels = np.array(globals()[f'test_label{i}'])
            accuracy = np.sum(ground_truth_labels == predicted_labels) / len(ground_truth_labels)
    
            if accuracy > globals()[f'best_accuracy{i}']:
                globals()[f'best_accuracy{i}'] = accuracy
                globals()[f'best_threshold{i}'] = threshold
                globals()[f'best_predicted_labels{i}'] = predicted_labels
                globals()[f'best_ground_truth_labels{i}'] = ground_truth_labels


    best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = 0, None, None    
    best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = 0, None, None

        
    for C_value in C_values['C']:    
        # SVMモデルのインスタンスを作成
        svm_model_RBF = SVC(kernel='rbf', C=C_value)
        svm_model_LINEAR = SVC(kernel='linear', C=C_value)

        # 訓練データで訓練
        svm_model_RBF.fit(X_train, Y_train)        
        svm_model_LINEAR.fit(X_train, Y_train)

        val_accuracy_RBF = accuracy_score(Y_val, svm_model_RBF.predict(X_val))        
        val_accuracy_LINEAR = accuracy_score(Y_val, svm_model_LINEAR.predict(X_val))

        # 検証データでの精度が最も高かった場合、そのモデルを保存
        if val_accuracy_RBF > best_val_score_RBF:
            best_val_score_RBF, best_svm_model_RBF, best_c_value_RBF = val_accuracy_RBF, svm_model_RBF, C_value
            
        if val_accuracy_LINEAR > best_val_score_LINEAR:
            best_val_score_LINEAR, best_svm_model_LINEAR, best_c_value_LINEAR = val_accuracy_LINEAR, svm_model_LINEAR, C_value

    fold_results = {}
    for i in range(1, 107):
        # RBFモデルの評価
        predictions_RBF = best_svm_model_RBF.predict(globals()[f'test_data{i}'])
        accuracy_RBF = accuracy_score(globals()[f'test_label{i}'], predictions_RBF)
        globals()[f'accuracy_RBF{i}'] = accuracy_RBF
        report_RBF = classification_report(globals()[f'test_label{i}'], predictions_RBF, digits=4, zero_division=1)
        conf_matrix = confusion_matrix(globals()[f'test_label{i}'], predictions_RBF)
        globals()[f'tnr_rbf{i}'] = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[0, 1])
        globals()[f'tpr_rbf{i}'] = conf_matrix[1, 1] / (conf_matrix[1, 0] + conf_matrix[1, 1])
        print(f'report_RBF for dataset {i}:\n{report_RBF}')

        # LINEARモデルの評価
        predictions_LINEAR = best_svm_model_LINEAR.predict(globals()[f'test_data{i}'])
        accuracy_LINEAR = accuracy_score(globals()[f'test_label{i}'], predictions_LINEAR)
        globals()[f'accuracy_LINEAR{i}'] = accuracy_LINEAR
        report_LINEAR = classification_report(globals()[f'test_label{i}'], predictions_LINEAR, digits=4, zero_division=1)
        conf_matrix = confusion_matrix(globals()[f'test_label{i}'], predictions_LINEAR)
        globals()[f'tnr_linear{i}'] = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[0, 1])
        globals()[f'tpr_linear{i}'] = conf_matrix[1, 1] / (conf_matrix[1, 0] + conf_matrix[1, 1])
        print(f'report_LINEAR for dataset {i}:\n{report_LINEAR}')

        # Old modelの評価
        test_old = classification_report(globals()[f'best_ground_truth_labels{i}'], globals()[f'best_predicted_labels{i}'], labels=[0, 1], target_names=['0', '1'], zero_division=0, digits=4)
        conf_matrix = confusion_matrix(globals()[f'best_ground_truth_labels{i}'], globals()[f'best_predicted_labels{i}'])
        globals()[f'tnr_old{i}'] = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[0, 1])
        globals()[f'tpr_old{i}'] = conf_matrix[1, 1] / (conf_matrix[1, 0] + conf_matrix[1, 1])
        print(f'Summary old_model for dataset {i}:\n{test_old}')
        

        # fold_resultsに保存
        fold_results[f'C_RBF{i}'] = best_c_value_RBF
        fold_results[f'Score_RBF{i}'] = globals()[f'accuracy_RBF{i}']
        fold_results[f'tnr_rbf{i}'] = globals()[f'tnr_rbf{i}']
        fold_results[f'tpr_rbf{i}'] = globals()[f'tpr_rbf{i}']

        fold_results[f'C_LINEAR{i}'] = best_c_value_LINEAR
        fold_results[f'Score_LINEAR{i}'] = globals()[f'accuracy_LINEAR{i}']
        fold_results[f'tnr_linear{i}'] = globals()[f'tnr_linear{i}']
        fold_results[f'tpr_linear{i}'] = globals()[f'tpr_linear{i}']

        fold_results[f'Threshold{i}'] = globals()[f'best_threshold{i}']
        fold_results[f'Score_old{i}'] = globals()[f'best_accuracy{i}']
        fold_results[f'tnr_old{i}'] = globals()[f'tnr_old{i}']
        fold_results[f'tpr_old{i}'] = globals()[f'tpr_old{i}']

    # 結果をデータフレームに追加
    results = pd.concat([results, pd.DataFrame(fold_results, index=[fold])], axis=0)

# 結果のデータフレームを表示
# print(results)
    

<Fold-1>
Train indices: [0 1 2 3 4 5 6 8]
Test indices: [7]
4800
600
report_RBF for dataset 1:
              precision    recall  f1-score   support

           0     0.5192    0.9000    0.6585        30
           1     0.6250    0.1667    0.2632        30

    accuracy                         0.5333        60
   macro avg     0.5721    0.5333    0.4608        60
weighted avg     0.5721    0.5333    0.4608        60

report_LINEAR for dataset 1:
              precision    recall  f1-score   support

           0     0.5088    0.9667    0.6667        30
           1     0.6667    0.0667    0.1212        30

    accuracy                         0.5167        60
   macro avg     0.5877    0.5167    0.3939        60
weighted avg     0.5877    0.5167    0.3939        60

Summary old_model for dataset 1:
              precision    recall  f1-score   support

           0     0.7273    0.2667    0.3902        30
           1     0.5510    0.9000    0.6835        30

    accuracy             

In [26]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 320)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# 前のコードでresultsデータフレームを作成していると仮定しています
# 各統計情報を100倍して小数点第2位までの表記に変更
statistics_data = {
    'Model': [f'RBF{i}' for i in range(1, 107)] + [f'LINEAR{i}' for i in range(1, 107)] + [f'OLD{i}' for i in range(1, 107)],
    'Average TNR': [
        round(results[f'tnr_rbf{i}'].mean() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'tnr_linear{i}'].mean() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'tnr_old{i}'].mean() * 100, 2) for i in range(1, 107)
    ],
    'Average TPR': [
        round(results[f'tpr_rbf{i}'].mean() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'tpr_linear{i}'].mean() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'tpr_old{i}'].mean() * 100, 2) for i in range(1, 107)
    ],
    'Average Test Score': [
        round(results[f'Score_RBF{i}'].mean() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_LINEAR{i}'].mean() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_old{i}'].mean() * 100, 2) for i in range(1, 107)
    ],
    'Standard Deviation': [
        round(results[f'Score_RBF{i}'].std() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_LINEAR{i}'].std() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_old{i}'].std() * 100, 2) for i in range(1, 107)
    ],
    'Max Test Score': [
        round(results[f'Score_RBF{i}'].max() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_LINEAR{i}'].max() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_old{i}'].max() * 100, 2) for i in range(1, 107)
    ],
    'Min Test Score': [
        round(results[f'Score_RBF{i}'].min() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_LINEAR{i}'].min() * 100, 2) for i in range(1, 107)
    ] + [
        round(results[f'Score_old{i}'].min() * 100, 2) for i in range(1, 107)
    ],
}

# DataFrameを作成
statistics_df = pd.DataFrame(statistics_data)

# 表示
print(statistics_df)

# 関数を定義して、各セグメントの統計情報を計算
def calculate_statistics(segment, prefix):
    # モデル番号を抽出してフラットなリストに変換
    model_numbers = statistics_df['Model'].str.extract(r'(\d+)').astype(int)[0]
    is_in_segment = model_numbers.isin(segment)
    is_correct_prefix = statistics_df['Model'].str.startswith(prefix)
    
    tnr_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average TNR'].mean(), 2)
    tpr_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average TPR'].mean(), 2)
    acc_mean = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average Test Score'].mean(), 2)
    acc_std = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average Test Score'].std(), 2)
    
    acc_max = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average Test Score'].max(), 2)
    acc_min = round(statistics_df.loc[is_correct_prefix & is_in_segment, 'Average Test Score'].min(), 2)
    
    return tnr_mean, tpr_mean, acc_mean, acc_std, acc_max, acc_min

# セグメントを定義
segments = {
    '1_57': list(range(1, 58)),
    '58_67': list(range(58, 68)),
    '68_106': list(range(68, 107))
}

# 結果を保存するリスト
results_summary = []

# 統計情報を計算して表示
for model in ['RBF', 'LINEAR', 'OLD']:
    for segment_name, segment in segments.items():
        tnr_mean, tpr_mean, acc_mean, acc_std, acc_max, acc_min = calculate_statistics(segment, model)
        results_summary.append({
            'Model': f'{model}_{segment_name}',
            'Average TNR': tnr_mean,
            'Average TPR': tpr_mean,
            'Average Test Score': acc_mean,
            'Test Score STD': acc_std,
            'Test Score MAX': acc_max,
            'Test Score MIN': acc_min
        })

# DataFrameに変換
summary_df = pd.DataFrame(results_summary)

# 表示
print(summary_df)



         Model  Average TNR  Average TPR  Average Test Score  Standard Deviation  Max Test Score  Min Test Score
0         RBF1        94.44        15.93               55.19                1.76           56.67           51.67
1         RBF2        94.44        29.63               62.04                2.74           66.67           60.00
2         RBF3        94.44        20.37               57.41                2.06           61.67           55.00
3         RBF4        97.78        44.44               71.11                2.36           75.00           66.67
4         RBF5        97.41        40.37               68.89                3.54           73.33           65.00
5         RBF6        91.11         9.26               50.19                2.94           53.33           45.00
6         RBF7        94.81        54.44               74.63                4.23           83.33           68.33
7         RBF8        97.78        35.56               66.67                6.35           76.67

In [27]:
print(results['C_RBF1'])
print(results['C_LINEAR1'])

0      10
1      10
2    5000
3     100
4    1000
5     100
6    3000
7     100
8      10
Name: C_RBF1, dtype: object
0    0.1
1    0.1
2    0.1
3    0.1
4    0.1
5    0.1
6     10
7    0.1
8    100
Name: C_LINEAR1, dtype: object


In [28]:
statistics_df.to_csv('statistics_data8.csv', index=False)