In [1]:
import os
import shutil

# Define the source directory containing the images
source_dir = "coco-search18-test-images"  # Update this path

# Define the target directories for the different image prefixes
ta_dir = "TA_images"  # Update this path
tp_dir = "TP_images"  # Update this path

# Create the target directories if they do not exist
if not os.path.exists(ta_dir):
    os.makedirs(ta_dir)
if not os.path.exists(tp_dir):
    os.makedirs(tp_dir)

# Initialize counters for the images in each directory
count_source = 0
count_ta = 0
count_tp = 0

# Loop through each file in the source directory
for filename in os.listdir(source_dir):
    if filename.startswith("TA"):
        # Copy file to TA_images folder
        shutil.copy(os.path.join(source_dir, filename), ta_dir)
        count_ta += 1
    elif filename.startswith("TP"):
        # Copy file to TP_images folder
        shutil.copy(os.path.join(source_dir, filename), tp_dir)
        count_tp += 1

    # Increment the source directory count
    count_source += 1

# Print the number of images in each directory
print(f"Total images in source folder: {count_source}")
print(f"Images starting with TA in TA_images folder: {count_ta}")
print(f"Images starting with TP in TP_images folder: {count_tp}")


Total images in source folder: 1224
Images starting with TA in TA_images folder: 612
Images starting with TP in TP_images folder: 612


In [18]:
import csv

# 输入的CSV文件路径
input_csv_file = 'fixdata-bottle-10-resize.csv'

# 输出的TSV文件路径
output_tsv_file = 'fixdata-bottle-10-resize.tsv'

# 使用'with'语句确保文件正确关闭
with open(input_csv_file, mode='r', newline='', encoding='utf-8') as csv_file:
    # 创建CSV读取器，这里假设分隔符为逗号
    csv_reader = csv.reader(csv_file, delimiter=',')

    # 打开输出的TSV文件
    with open(output_tsv_file, mode='w', newline='', encoding='utf-8') as tsv_file:
        # 创建TSV写入器，分隔符为制表符
        tsv_writer = csv.writer(tsv_file, delimiter='\t')

        # 逐行读取CSV文件，并写入到TSV文件中
        for row in csv_reader:
            tsv_writer.writerow(row)

print("转换完成，文件已保存为:", output_tsv_file)


转换完成，文件已保存为: fixdata-bottle-10-resize.tsv


In [19]:
import pandas as pd

# 加载TSV文件
df = pd.read_csv('fixdata-bottle-10-resize.tsv', sep='\t')

# 定义一个函数来处理img_name列，提取数字并去掉开头的0
def extract_image_id(img_name):
    # 假设img_name的格式是一些前导0后接数字，例如 '00012345.jpg'
    # 提取数字部分，并转换为int来自动去除前导0，再转回字符串
    num_part = img_name.split('.')[0]  # 假设文件名中的数字后面跟的是扩展名
    image_id = str(int(num_part))  # 转为整数再转回字符串去除前导零
    return image_id

# 应用这个函数到img_name列，并创建新的image_id列
df['image_id'] = df['img_name'].apply(extract_image_id)

# 保存修改后的DataFrame到新的TSV文件
df.to_csv('fixdata-bottle-10-resize.tsv', sep='\t', index=False)


In [20]:
import pandas as pd

# 读取TSV文件
df = pd.read_csv('fixdata-bottle-10-resize.tsv', sep='\t')

# 根据条件分割数据
present_df = df[df['condition'] == 'present']
absent_df = df[df['condition'] == 'absent']

# 将分割后的数据写入新的TSV文件
present_df.to_csv('TP_fixation.tsv', sep='\t', index=False)
absent_df.to_csv('TA_fixation.tsv', sep='\t', index=False)


In [21]:
import pandas as pd
import os
import shutil

def process_images():
    # 读取TSV文件
    df = pd.read_csv('TA_fixation.tsv', delimiter='\t')
    
    # 获取不重复的image_id集合
    image_ids = set(df['image_id'])
    
    # 创建一个文件夹用于存放重命名的图片
    os.makedirs('TA_images', exist_ok=True)
    
    # 设置源图片文件夹路径
    source_folder = 'coco-search18-test-images'
    
    # 遍历每个image_id
    for image_id in image_ids:
        # 确保image_id是字符串类型
        image_id_str = str(image_id)
        # 在文件夹中搜索符合条件的图片
        for filename in os.listdir(source_folder):
            if image_id_str in filename:
                # 构建源文件的完整路径
                source_file = os.path.join(source_folder, filename)
                # 构建目标文件的完整路径
                target_file = os.path.join('TA_images', f'{image_id_str}.jpg')
                # 复制并重命名图片
                shutil.copy(source_file, target_file)
                # 找到第一个匹配后就跳出循环
                break

if __name__ == '__main__':
    process_images()


In [23]:
import pandas as pd
import os
import shutil

def process_images():
    # 读取TSV文件
    df = pd.read_csv('TP_fixation.tsv', delimiter='\t')
    
    # 获取不重复的image_id集合
    image_ids = set(df['image_id'])
    
    # 创建一个文件夹用于存放重命名的图片
    os.makedirs('TP_images', exist_ok=True)
    
    # 设置源图片文件夹路径
    source_folder = 'coco-search18-test-images'
    
    # 遍历每个image_id
    for image_id in image_ids:
        # 确保image_id是字符串类型
        image_id_str = str(image_id)
        # 在文件夹中搜索符合条件的图片
        for filename in os.listdir(source_folder):
            if image_id_str in filename:
                # 构建源文件的完整路径
                source_file = os.path.join(source_folder, filename)
                # 构建目标文件的完整路径
                target_file = os.path.join('TP_images', f'{image_id_str}.jpg')
                # 复制并重命名图片
                shutil.copy(source_file, target_file)
                # 找到第一个匹配后就跳出循环
                break

if __name__ == '__main__':
    process_images()


In [25]:
import os
import re
import csv

def extract_image_ids(directory_path, output_csv):
    # Traverse all files in the directory
    images = [f for f in os.listdir(directory_path) if f.endswith(('.png', '.jpg', '.jpeg'))]

    # Open the CSV file for writing
    with open(output_csv, 'w', newline='') as file:
        writer = csv.writer(file, delimiter='\t')  # Use tab as the delimiter
        # Write the header
        writer.writerow(['imageid', 'filename'])

        # Iterate over image files
        for image in images:
            # Use regular expression to extract the numeric part of the filename
            match = re.search(r'\d+', image)
            if match:
                # Strip leading zeros
                image_id = match.group(0).lstrip('0')
                # If stripping leads to an empty string, assign '0'
                if not image_id:
                    image_id = '0'
            else:
                image_id = 'Unknown'  # If no numbers, mark as 'Unknown'

            # Write the data row
            writer.writerow([image_id, image])

directory_path1 = 'TA_images'  # Directory where the image files are located
output_csv = 'TA_images/TA_images.tsv'  # Output CSV file name
extract_image_ids(directory_path1, output_csv)

directory_path2 = 'TP_images'  # Directory where the image files are located
output_csv = 'TP_images/TP_images.tsv'  # Output CSV file name
extract_image_ids(directory_path2, output_csv)

directory_path3 = 'TA_reward'
output_csv = 'TA_reward/TA_reward.tsv'
extract_image_ids(directory_path3, output_csv)

directory_path4 = 'TP_reward'
output_csv = 'TP_reward/TP_reward.tsv'
extract_image_ids(directory_path4, output_csv)

directory_path5 = 'TA_saliency'
output_csv = 'TA_saliency/TA_saliency.tsv'
extract_image_ids(directory_path5, output_csv)

directory_path6 = 'TP_saliency'
output_csv = 'TP_saliency/TP_saliency.tsv'
extract_image_ids(directory_path6, output_csv)


In [10]:
import pandas as pd

# 加载TA_images.tsv文件
images_path = 'TA_images/TA_images.tsv'
images_data = pd.read_csv(images_path, sep='\t')
# 获取不重复的imageid集合
unique_image_ids = set(images_data['imageid'])

# 打印集合大小
print(f"Unique Image IDs count: {len(unique_image_ids)}")

# 加载TA_fixations.tsv文件
fixations_path = 'TA_fixations.tsv'
fixations_data = pd.read_csv(fixations_path, sep='\t')
# 删除不在imageid集合中的行
filtered_fixations = fixations_data[fixations_data['image_id'].isin(unique_image_ids)]

# 保存修改后的文件
filtered_fixations.to_csv('TA_fixations.tsv', sep='\t', index=False)


Unique Image IDs count: 41


In [12]:
import os
import pandas as pd

# 步骤1: 读取TSV文件中的imageid列
def read_image_ids(filepath):
    df = pd.read_csv(filepath, sep='\t')
    # 确保imageid是字符串类型
    return set(df['imageid'].astype(str))

# 步骤2: 检查并删除不包含imageid的图片
def delete_unmatched_images(directory, image_ids):
    for filename in os.listdir(directory):
        # 检查文件名是否包含任何有效的imageid，确保image_id是字符串
        if not any(str(image_id) in filename for image_id in image_ids):
            os.remove(os.path.join(directory, filename))
            print(f"Deleted: {filename}")

# 文件路径和目录
tsv_file_path = 'TA_images/TA_images.tsv'
images_directory = 'TA_saliency'

# 执行步骤
image_ids = read_image_ids(tsv_file_path)
delete_unmatched_images(images_directory, image_ids)




Deleted: TP_clock_000000148614.jpg
Deleted: TA_tv_000000487774.jpg
Deleted: TA_fork_000000260486.jpg
Deleted: TP_potted plant_000000513219.jpg
Deleted: TP_tv_000000558673.jpg
Deleted: TP_keyboard_000000175954.jpg
Deleted: TP_chair_000000006608.jpg
Deleted: TA_toilet_000000221659.jpg
Deleted: TA_cup_000000392753.jpg
Deleted: TA_bowl_000000498583.jpg
Deleted: TA_cup_000000259422.jpg
Deleted: TP_toilet_000000240501.jpg
Deleted: TA_oven_000000250210.jpg
Deleted: TP_mouse_000000555009.jpg
Deleted: TP_fork_000000068881.jpg
Deleted: TA_mouse_000000144539.jpg
Deleted: TA_tv_000000568982.jpg
Deleted: TP_tv_000000244571.jpg
Deleted: TP_sink_000000505132.jpg
Deleted: TP_tv_000000325992.jpg
Deleted: TP_tv_000000546934.jpg
Deleted: TA_toilet_000000138549.jpg
Deleted: TA_potted plant_000000453722.jpg
Deleted: TP_sink_000000343466.jpg
Deleted: TA_mouse_000000387482.jpg
Deleted: TP_sink_000000270721.jpg
Deleted: TP_oven_000000541345.jpg
Deleted: TA_cup_000000070626.jpg
Deleted: TP_sink_000000058029.jp

In [14]:
import os
import re

def print_image_ids_grouped(directory):
    # 确保目录存在
    if not os.path.isdir(directory):
        print("指定的目录不存在")
        return
    
    # 创建字典来存储 imageid 与文件名的对应关系
    images = {}
    
    # 遍历目录中的所有文件
    for filename in os.listdir(directory):
        # 使用正则表达式查找文件名中的数字部分
        match = re.search(r'\d+', filename)
        if match:
            image_id = match.group(0)
            # 如果字典中已存在此 imageid，添加文件名到对应的列表中
            if image_id in images:
                images[image_id].append(filename)
            else:
                images[image_id] = [filename]

    # 打印每个 imageid 及其对应的所有文件名
    for image_id, filenames in images.items():
        print(f"Image ID: {image_id}, Files: {', '.join(filenames)}")

# 调用函数
directory = "visualize_softmax_return_TPtoTA"  # 替换为你的图片目录路径
print_image_ids_grouped(directory)



Image ID: 000000369128, Files: TA_knife_000000369128_step0.jpg, TA_knife_000000369128_step1.jpg, TA_knife_000000369128_step3.jpg, TA_knife_000000369128_step2.jpg, TA_knife_000000369128_step5.jpg, TA_knife_000000369128_step4.jpg, TA_knife_000000369128_concat.jpg, TA_fork_000000369128_step4.jpg, TA_fork_000000369128_step5.jpg, TA_fork_000000369128_step1.jpg, TA_fork_000000369128_step0.jpg, TA_fork_000000369128_step2.jpg, TA_fork_000000369128_concat.jpg, TA_fork_000000369128_step3.jpg
Image ID: 000000313321, Files: TA_toilet_000000313321_step0.jpg, TA_toilet_000000313321_step1.jpg, TA_toilet_000000313321_step3.jpg, TA_toilet_000000313321_step2.jpg, TA_toilet_000000313321_step5.jpg, TA_toilet_000000313321_step4.jpg, TA_toilet_000000313321_concat.jpg
Image ID: 000000546649, Files: TA_toilet_000000546649_step5.jpg, TA_toilet_000000546649_step4.jpg, TA_toilet_000000546649_step3.jpg, TA_toilet_000000546649_step2.jpg, TA_toilet_000000546649_step0.jpg, TA_toilet_000000546649_step1.jpg, TA_toilet

In [17]:
import os
import glob

folder_path = 'TP_reward'

# 通过glob.glob获取所有图片文件
for file_path in glob.glob(folder_path + '/*'):
    # 检查文件是否为图片
    if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        # 获取文件的基本名称，例如从 'path/to/your/folder/image.png' 中提取 'image'
        base_name = os.path.basename(file_path)
        # 分割文件名和后缀
        name, ext = os.path.splitext(base_name)
        
        # 检查文件名是否以 'concat' 结尾
        if not name.endswith('concat'):
            # 不以 'concat' 结尾，删除文件
            os.remove(file_path)
            print(f'Deleted: {file_path}')
        else:
            # 如果文件名以 'concat' 结尾，则移除 'concat' 并重命名文件
            new_name = name[:-7]  # 去除末尾的 'concat'
            new_file_path = os.path.join(folder_path, new_name + ext)
            os.rename(file_path, new_file_path)
            print(f'Renamed: {file_path} to {new_file_path}')



Renamed: TP_reward/TP_potted plant_000000543672_concat.jpg to TP_reward/TP_potted plant_000000543672.jpg
Deleted: TP_reward/TP_tv_000000439472_step2.jpg
Deleted: TP_reward/TP_chair_000000000164_step2.jpg
Deleted: TP_reward/TP_chair_000000577590_step5.jpg
Deleted: TP_reward/TP_chair_000000570465_step5.jpg
Deleted: TP_reward/TP_bottle_000000086135_step1.jpg
Deleted: TP_reward/TP_bowl_000000512476_step1.jpg
Deleted: TP_reward/TP_cup_000000190292_step1.jpg
Deleted: TP_reward/TP_cup_000000572260_step3.jpg
Renamed: TP_reward/TP_bottle_000000394517_concat.jpg to TP_reward/TP_bottle_000000394517.jpg
Deleted: TP_reward/TP_tv_000000244571_step0.jpg
Deleted: TP_reward/TP_bowl_000000042526_step0.jpg
Deleted: TP_reward/TP_chair_000000336324_step2.jpg
Deleted: TP_reward/TP_tv_000000213375_step1.jpg
Renamed: TP_reward/TP_microwave_000000390184_concat.jpg to TP_reward/TP_microwave_000000390184.jpg
Deleted: TP_reward/TP_tv_000000573823_step0.jpg
Deleted: TP_reward/TP_bowl_000000163528_step3.jpg
Deleted

In [23]:
import os

def count_images_in_folder(folder_path):
    # 支持的图片文件扩展名
    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'}
    image_count = 0

    # 遍历文件夹
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # 检查文件扩展名是否在支持的扩展名列表中
            if os.path.splitext(file)[1].lower() in image_extensions:
                image_count += 1

    return image_count

# 用实际的文件夹路径替换这里的'path_to_folder'
folder_path_1 = 'TA_images'
print("Number of images in TA images:", count_images_in_folder(folder_path_1))

folder_path_2 = 'TP_images'
print("Number of images in TP images:", count_images_in_folder(folder_path_2))

folder_path_3 = 'TA_reward'
print("Number of images in TA reward:", count_images_in_folder(folder_path_3))

folder_path_4 = 'TP_reward'
print("Number of images in TP reward:", count_images_in_folder(folder_path_4))



Number of images in TA images: 33
Number of images in TP images: 33
Number of images in TA reward: 33
Number of images in TP reward: 33


In [6]:
import os

# 设置要遍历的文件夹路径
folder_path = 'TP_reward'

# 遍历文件夹中的所有文件
for filename in os.listdir(folder_path):
    # 检查文件名是否不以'TA_bottle_'开头
    if not filename.startswith('TP_bottle_'):
        # 构建完整的文件路径
        file_path = os.path.join(folder_path, filename)
        # 如果是文件，则删除
        if os.path.isfile(file_path):
            os.remove(file_path)
            print(f"Deleted '{file_path}'")
        else:
            print(f"'{file_path}' is not a file and was not deleted.")


Deleted 'TP_reward/TP_clock_000000148614.jpg'
Deleted 'TP_reward/TP_potted plant_000000513219.jpg'
Deleted 'TP_reward/TP_tv_000000558673.jpg'
Deleted 'TP_reward/TP_keyboard_000000175954.jpg'
Deleted 'TP_reward/TP_chair_000000006608.jpg'
Deleted 'TP_reward/TP_toilet_000000240501.jpg'
Deleted 'TP_reward/TP_mouse_000000555009.jpg'
Deleted 'TP_reward/TP_fork_000000068881.jpg'
Deleted 'TP_reward/TP_tv_000000244571.jpg'
Deleted 'TP_reward/TP_sink_000000505132.jpg'
Deleted 'TP_reward/TP_tv_000000325992.jpg'
Deleted 'TP_reward/TP_tv_000000546934.jpg'
Deleted 'TP_reward/TP_sink_000000343466.jpg'
Deleted 'TP_reward/TP_sink_000000270721.jpg'
Deleted 'TP_reward/TP_oven_000000541345.jpg'
Deleted 'TP_reward/TP_sink_000000058029.jpg'
Deleted 'TP_reward/TP_laptop_000000088244.jpg'
Deleted 'TP_reward/TP_clock_000000150675.jpg'
Deleted 'TP_reward/TP_car_000000436694.jpg'
Deleted 'TP_reward/TP_stop sign_000000410855.jpg'
Deleted 'TP_reward/TP_keyboard_000000024104.jpg'
Deleted 'TP_reward/TP_toilet_000000

In [27]:
import os
import re

def rename_files(directory):
    # 遍历指定目录下的所有文件
    for filename in os.listdir(directory):
        # 检查文件扩展名是否为.jpg
        if filename.endswith('.jpg'):
            # 使用正则表达式查找文件名中的数字，并去掉前导零
            new_filename = re.sub(r'0+(\d+)', r'\1', filename)
            # 构造旧文件和新文件的完整路径
            old_file = os.path.join(directory, filename)
            new_file = os.path.join(directory, new_filename)
            # 重命名文件
            os.rename(old_file, new_file)
            print(f'Renamed "{filename}" to "{new_filename}"')

# 指定要遍历的文件夹路径
folder_path_1 = 'TP_saliency'
rename_files(folder_path_1)

folder_path_2 = 'TA_saliency'
rename_files(folder_path_2)


Renamed "TP_bottle_000000450391.jpg" to "TP_bottle_450391.jpg"
Renamed "TP_bottle_000000319696.jpg" to "TP_bottle_319696.jpg"
Renamed "TP_bottle_000000298773.jpg" to "TP_bottle_298773.jpg"
Renamed "TP_bottle_000000520077.jpg" to "TP_bottle_520077.jpg"
Renamed "TP_bottle_000000271117.jpg" to "TP_bottle_271117.jpg"
Renamed "TP_bottle_000000478155.jpg" to "TP_bottle_478155.jpg"
Renamed "TP_bottle_000000086135.jpg" to "TP_bottle_86135.jpg"
Renamed "TP_bottle_000000520012.jpg" to "TP_bottle_520012.jpg"
Renamed "TP_bottle_000000138086.jpg" to "TP_bottle_138086.jpg"
Renamed "TP_bottle_000000253489.jpg" to "TP_bottle_253489.jpg"
Renamed "TP_bottle_000000302823.jpg" to "TP_bottle_302823.jpg"
Renamed "TP_bottle_000000297233.jpg" to "TP_bottle_297233.jpg"
Renamed "TP_bottle_000000394517.jpg" to "TP_bottle_394517.jpg"
Renamed "TP_bottle_000000575834.jpg" to "TP_bottle_575834.jpg"
Renamed "TP_bottle_000000037367.jpg" to "TP_bottle_37367.jpg"
Renamed "TP_bottle_000000294475.jpg" to "TP_bottle_294475

In [29]:
import os
import re

def rename_images(folder_path):
    # 遍历文件夹中的所有文件
    for filename in os.listdir(folder_path):
        # 检查文件名中的数字部分
        match = re.search(r'\d+', filename)
        if match:
            new_filename = match.group() + os.path.splitext(filename)[1]
            original_path = os.path.join(folder_path, filename)
            new_path = os.path.join(folder_path, new_filename)
            # 重命名文件
            os.rename(original_path, new_path)
            print(f"Renamed '{filename}' to '{new_filename}'")

# 调用函数，输入你的文件夹路径
folder_path = 'TA_saliency'
rename_images(folder_path)


Renamed 'TA_bottle_103161.jpg' to '103161.jpg'
Renamed 'TA_bottle_175593.jpg' to '175593.jpg'
Renamed 'TA_bottle_560427.jpg' to '560427.jpg'
Renamed 'TA_bottle_439224.jpg' to '439224.jpg'
Renamed 'TA_bottle_185698.jpg' to '185698.jpg'
Renamed 'TA_bottle_316505.jpg' to '316505.jpg'
Renamed 'TA_bottle_2759.jpg' to '2759.jpg'
Renamed 'TA_bottle_211186.jpg' to '211186.jpg'
Renamed 'TA_bottle_420666.jpg' to '420666.jpg'
Renamed 'TA_bottle_173208.jpg' to '173208.jpg'
Renamed 'TA_bottle_141108.jpg' to '141108.jpg'
Renamed 'TA_bottle_469671.jpg' to '469671.jpg'
Renamed 'TA_bottle_429679.jpg' to '429679.jpg'
Renamed 'TA_bottle_525903.jpg' to '525903.jpg'
Renamed 'TA_bottle_135256.jpg' to '135256.jpg'
Renamed 'TA_bottle_351609.jpg' to '351609.jpg'
Renamed 'TA_bottle_555387.jpg' to '555387.jpg'
Renamed 'TA_bottle_346940.jpg' to '346940.jpg'
Renamed 'TA_bottle_491098.jpg' to '491098.jpg'
Renamed 'TA_bottle_575915.jpg' to '575915.jpg'
Renamed 'TA_bottle_124759.jpg' to '124759.jpg'
Renamed 'TA_bottl

In [14]:
import os
import glob

def get_image_ids(folder_path):
    # 使用glob来找到所有的图片文件
    files = glob.glob(os.path.join(folder_path, '*.jpg')) + glob.glob(os.path.join(folder_path, '*.png'))
    # 获取文件名（不包含后缀）
    image_ids = [os.path.splitext(os.path.basename(file))[0] for file in files]
    return set(image_ids)

def delete_unmatched_images(source_folder, image_ids):
    # 遍历源文件夹中的所有图片文件
    files = glob.glob(os.path.join(source_folder, '*.jpg')) + glob.glob(os.path.join(source_folder, '*.png'))
    for file in files:
        # 获取当前文件的image id
        image_id = os.path.splitext(os.path.basename(file))[0]
        # 检查当前image id是否不在集合中
        if not any(id in image_id for id in image_ids):
            # 如果不在集合中，删除该文件
            os.remove(file)
            print(f"Deleted: {file}")

# 示例使用
folder_with_ids = 'TA_rewards'
source_folder = 'TA_images'

# 获取image id集合
ids = get_image_ids(folder_with_ids)
# 删除不匹配的图片
delete_unmatched_images(source_folder, ids)


Deleted: TA_images/420666.jpg
Deleted: TA_images/211186.jpg
Deleted: TA_images/173208.jpg
Deleted: TA_images/511241.jpg
Deleted: TA_images/141108.jpg
Deleted: TA_images/469671.jpg
Deleted: TA_images/429679.jpg
Deleted: TA_images/9866.jpg
Deleted: TA_images/525903.jpg
Deleted: TA_images/2759.jpg
Deleted: TA_images/103161.jpg
Deleted: TA_images/560427.jpg
Deleted: TA_images/175593.jpg
Deleted: TA_images/185698.jpg
Deleted: TA_images/535151.jpg
Deleted: TA_images/316505.jpg
Deleted: TA_images/439224.jpg
Deleted: TA_images/253452.jpg
Deleted: TA_images/115866.jpg
Deleted: TA_images/547493.jpg
Deleted: TA_images/443597.jpg
Deleted: TA_images/258661.jpg
Deleted: TA_images/160421.jpg
Deleted: TA_images/272738.jpg
Deleted: TA_images/346940.jpg
Deleted: TA_images/491098.jpg
Deleted: TA_images/575915.jpg
Deleted: TA_images/182417.jpg
Deleted: TA_images/135256.jpg
Deleted: TA_images/77951.jpg
Deleted: TA_images/351609.jpg
Deleted: TA_images/196.jpg
Deleted: TA_images/555387.jpg
Deleted: TA_images

In [18]:
import os

# Directory path for the TP_rewards folder
directory_path = "TP_reward"

# Initialize an empty set to store image IDs
image_ids = set()

# Traverse through the directory and get image file names without their extensions
for filename in os.listdir(directory_path):
    if filename.endswith((".png", ".jpg", ".jpeg")):  # Check for image files
        image_id = os.path.splitext(filename)[0]  # Get filename without extension
        image_ids.add(image_id)

# Print the set of image IDs
print(image_ids)


{'297233', '476851', '276488', '426253', '294475', '302823', '520012', '137967', '371864', '303670', '433505', '271117', '520077', '406426', '394517', '253489', '298773', '86135', '581205', '231822', '478155', '450391', '575834', '37367', '138086', '319696', '148977', '165639', '225129', '122602', '547875', '325992', '182213'}


In [22]:
import os
from pathlib import Path

def get_image_ids(directory):
    # 获取指定目录中所有文件的基本名称（不含后缀）
    image_ids = set()
    for file in os.listdir(directory):
        if file.endswith(('.png', '.jpg', '.jpeg')):
            image_ids.add(Path(file).stem)
    return image_ids

def delete_unmatched_images(source_dir, ids):
    # 删除不包含指定ids中任何一个的文件
    for file in os.listdir(source_dir):
        if not any(id in file for id in ids):
            os.remove(os.path.join(source_dir, file))
            print(f"Deleted {file}")

# 路径配置
rewards_dir = 'TA_reward'
images_dir = 'TA_images'

# 获取image IDs
image_ids = get_image_ids(rewards_dir)

# 删除不匹配的图片
delete_unmatched_images(images_dir, image_ids)


Deleted 511241.jpg
Deleted 9866.jpg
Deleted 535151.jpg
Deleted 258661.jpg
Deleted 182417.jpg
Deleted 196.jpg
Deleted 3992.jpg
Deleted 434179.jpg
