In [1]:
import os
import shutil

# Define the source directory containing the images
source_dir = "coco-search18-test-images"  # Update this path

# Define the target directories for the different image prefixes
ta_dir = "TA_images"  # Update this path
tp_dir = "TP_images"  # Update this path

# Create the target directories if they do not exist
if not os.path.exists(ta_dir):
    os.makedirs(ta_dir)
if not os.path.exists(tp_dir):
    os.makedirs(tp_dir)

# Initialize counters for the images in each directory
count_source = 0
count_ta = 0
count_tp = 0

# Loop through each file in the source directory
for filename in os.listdir(source_dir):
    if filename.startswith("TA"):
        # Copy file to TA_images folder
        shutil.copy(os.path.join(source_dir, filename), ta_dir)
        count_ta += 1
    elif filename.startswith("TP"):
        # Copy file to TP_images folder
        shutil.copy(os.path.join(source_dir, filename), tp_dir)
        count_tp += 1

    # Increment the source directory count
    count_source += 1

# Print the number of images in each directory
print(f"Total images in source folder: {count_source}")
print(f"Images starting with TA in TA_images folder: {count_ta}")
print(f"Images starting with TP in TP_images folder: {count_tp}")


Total images in source folder: 1224
Images starting with TA in TA_images folder: 612
Images starting with TP in TP_images folder: 612


In [1]:
import csv

# 输入的CSV文件路径
input_csv_file = 'fixdata-bottle-10-resize.csv'

# 输出的TSV文件路径
output_tsv_file = 'fixdata-bottle-10-resize.tsv'

# 使用'with'语句确保文件正确关闭
with open(input_csv_file, mode='r', newline='', encoding='utf-8') as csv_file:
    # 创建CSV读取器，这里假设分隔符为逗号
    csv_reader = csv.reader(csv_file, delimiter=',')

    # 打开输出的TSV文件
    with open(output_tsv_file, mode='w', newline='', encoding='utf-8') as tsv_file:
        # 创建TSV写入器，分隔符为制表符
        tsv_writer = csv.writer(tsv_file, delimiter='\t')

        # 逐行读取CSV文件，并写入到TSV文件中
        for row in csv_reader:
            tsv_writer.writerow(row)

print("转换完成，文件已保存为:", output_tsv_file)


转换完成，文件已保存为: fixdata-bottle-10-resize.tsv


In [2]:
import pandas as pd

# 加载TSV文件
df = pd.read_csv('fixdata-bottle-10-resize.tsv', sep='\t')

# 定义一个函数来处理img_name列，提取数字并去掉开头的0
def extract_image_id(img_name):
    # 假设img_name的格式是一些前导0后接数字，例如 '00012345.jpg'
    # 提取数字部分，并转换为int来自动去除前导0，再转回字符串
    num_part = img_name.split('.')[0]  # 假设文件名中的数字后面跟的是扩展名
    image_id = str(int(num_part))  # 转为整数再转回字符串去除前导零
    return image_id

# 应用这个函数到img_name列，并创建新的image_id列
df['image_id'] = df['img_name'].apply(extract_image_id)

# 保存修改后的DataFrame到新的TSV文件
df.to_csv('fixdata-bottle-10-resize.tsv', sep='\t', index=False)


In [3]:
import pandas as pd

# 读取TSV文件
df = pd.read_csv('fixdata-bottle-10-resize.tsv', sep='\t')

# 根据条件分割数据
present_df = df[df['condition'] == 'present']
absent_df = df[df['condition'] == 'absent']

# 将分割后的数据写入新的TSV文件
present_df.to_csv('TP-fixdata-bottle-10-resize.tsv', sep='\t', index=False)
absent_df.to_csv('TA-fixdata-bottle-10-resize.tsv', sep='\t', index=False)


In [7]:
import pandas as pd
import os
import shutil

def process_images():
    # 读取TSV文件
    df = pd.read_csv('TA-fixdata-bottle-10-resize.tsv', delimiter='\t')
    
    # 获取不重复的image_id集合
    image_ids = set(df['image_id'])
    
    # 创建一个文件夹用于存放重命名的图片
    os.makedirs('TA_images', exist_ok=True)
    
    # 设置源图片文件夹路径
    source_folder = 'coco-search18-test-images'
    
    # 遍历每个image_id
    for image_id in image_ids:
        # 确保image_id是字符串类型
        image_id_str = str(image_id)
        # 在文件夹中搜索符合条件的图片
        for filename in os.listdir(source_folder):
            if image_id_str in filename:
                # 构建源文件的完整路径
                source_file = os.path.join(source_folder, filename)
                # 构建目标文件的完整路径
                target_file = os.path.join('TA_images', f'{image_id_str}.jpg')
                # 复制并重命名图片
                shutil.copy(source_file, target_file)
                # 找到第一个匹配后就跳出循环
                break

if __name__ == '__main__':
    process_images()


In [8]:
import pandas as pd
import os
import shutil

def process_images():
    # 读取TSV文件
    df = pd.read_csv('TP-fixdata-bottle-10-resize.tsv', delimiter='\t')
    
    # 获取不重复的image_id集合
    image_ids = set(df['image_id'])
    
    # 创建一个文件夹用于存放重命名的图片
    os.makedirs('TP_images', exist_ok=True)
    
    # 设置源图片文件夹路径
    source_folder = 'coco-search18-test-images'
    
    # 遍历每个image_id
    for image_id in image_ids:
        # 确保image_id是字符串类型
        image_id_str = str(image_id)
        # 在文件夹中搜索符合条件的图片
        for filename in os.listdir(source_folder):
            if image_id_str in filename:
                # 构建源文件的完整路径
                source_file = os.path.join(source_folder, filename)
                # 构建目标文件的完整路径
                target_file = os.path.join('TP_images', f'{image_id_str}.jpg')
                # 复制并重命名图片
                shutil.copy(source_file, target_file)
                # 找到第一个匹配后就跳出循环
                break

if __name__ == '__main__':
    process_images()
