In [2]:
import json
import os

# Path to the base directory
base_dir = "/home/minhyekj/chestXray/VinCR"

# Paths for the input COCO file and train/test folders
vindxr_coco_path = os.path.join(base_dir, "vindxr_coco.json")
train_images_dir = os.path.join(base_dir, "jpg", "train")
test_images_dir = os.path.join(base_dir, "jpg", "test")

# Output files
train_json_path = os.path.join(base_dir, "train.json")
test_json_path = os.path.join(base_dir, "test.json")

# Load the COCO file
with open(vindxr_coco_path, "r") as f:
    coco_data = json.load(f)

# Get the list of image filenames in train and test folders
train_files = set(os.listdir(train_images_dir))
test_files = set(os.listdir(test_images_dir))
print(len(train_files))
print(len(test_files))

# Separate the images and annotations based on train/test split
train_images, test_images = [], []
train_annotations, test_annotations = [], []

for image in coco_data["images"]:
    if image["file_name"] in train_files:
        train_images.append(image)
    elif image["file_name"] in test_files:
        test_images.append(image)

# Map image IDs to annotations
image_id_to_annotations = {image["id"]: [] for image in coco_data["images"]}
for annotation in coco_data["annotations"]:
    image_id_to_annotations[annotation["image_id"]].append(annotation)

# Collect annotations for train and test images
for image in train_images:
    train_annotations.extend(image_id_to_annotations[image["id"]])

for image in test_images:
    test_annotations.extend(image_id_to_annotations[image["id"]])

# Create train and test COCO dictionaries
train_coco = {
    "images": train_images,
    "annotations": train_annotations,
    "categories": coco_data.get("categories", []),
}

test_coco = {
    "images": test_images,
    "annotations": test_annotations,
    "categories": coco_data.get("categories", []),
}

# Save the train and test COCO JSON files
with open(train_json_path, "w") as f:
    json.dump(train_coco, f, indent=4)

with open(test_json_path, "w") as f:
    json.dump(test_coco, f, indent=4)

print(f"Train JSON saved to: {train_json_path}")
print(f"Test JSON saved to: {test_json_path}")


15000
3000
Train JSON saved to: /home/minhyekj/chestXray/VinCR/train.json
Test JSON saved to: /home/minhyekj/chestXray/VinCR/test.json


In [3]:
import json
import random
import os

# 경로 설정
base_dir = "/home/minhyekj/chestXray/VinCR"
train_json_path = os.path.join(base_dir, "train.json")
train2_json_path = os.path.join(base_dir, "train2.json")
test2_json_path = os.path.join(base_dir, "test2.json")

# 랜덤 시드 설정 (재현 가능성을 위해)
random.seed(42)

# train.json 로드
with open(train_json_path, "r") as f:
    train_data = json.load(f)

# train.json에서 이미지와 관련된 정보를 가져오기
train_images = train_data["images"]
train_annotations = train_data["annotations"]
categories = train_data.get("categories", [])

# 3000개의 이미지를 랜덤 샘플링하여 test2로 사용
test2_images = random.sample(train_images, 3000)

# 나머지 이미지는 train2로 사용
test2_image_ids = {img["id"] for img in test2_images}
train2_images = [img for img in train_images if img["id"] not in test2_image_ids]

# 각 이미지 ID에 해당하는 annotations를 분리
test2_annotations = [ann for ann in train_annotations if ann["image_id"] in test2_image_ids]
train2_annotations = [ann for ann in train_annotations if ann["image_id"] not in test2_image_ids]

# COCO 포맷으로 저장
train2_coco = {
    "images": train2_images,
    "annotations": train2_annotations,
    "categories": categories,
}

test2_coco = {
    "images": test2_images,
    "annotations": test2_annotations,
    "categories": categories,
}

# train2.json과 test2.json 저장
with open(train2_json_path, "w") as f:
    json.dump(train2_coco, f, indent=4)

with open(test2_json_path, "w") as f:
    json.dump(test2_coco, f, indent=4)

print(f"Train2 JSON saved to: {train2_json_path}")
print(f"Test2 JSON saved to: {test2_json_path}")

Train2 JSON saved to: /home/minhyekj/chestXray/VinCR/train2.json
Test2 JSON saved to: /home/minhyekj/chestXray/VinCR/test2.json


In [4]:
import os
import json
import shutil

# 경로 설정
base_dir = "/home/minhyekj/chestXray/VinCR"
jpg_dir = os.path.join(base_dir, "jpg", "train")  # 기존 train 이미지는 여기 있다고 가정
train2_json_path = os.path.join(base_dir, "train2.json")
test2_json_path = os.path.join(base_dir, "test2.json")

# 새로 생성할 폴더 경로
train2_dir = os.path.join(base_dir, "jpg", "train2")
test2_dir = os.path.join(base_dir, "jpg", "test2")

# 폴더 생성 (이미 존재하면 무시)
os.makedirs(train2_dir, exist_ok=True)
os.makedirs(test2_dir, exist_ok=True)

# train2.json 로드
with open(train2_json_path, "r") as f:
    train2_data = json.load(f)

# test2.json 로드
with open(test2_json_path, "r") as f:
    test2_data = json.load(f)

# 이미지 복사 함수
def copy_images(image_data, source_dir, target_dir):
    for image in image_data:
        src_path = os.path.join(source_dir, image["file_name"])
        dest_path = os.path.join(target_dir, image["file_name"])
        if os.path.exists(src_path):  # 파일이 존재하는 경우에만 복사
            shutil.copy(src_path, dest_path)
        else:
            print(f"파일 누락: {src_path}")  # 누락된 파일 경고 메시지

# train2 이미지를 train2 폴더로 복사
copy_images(train2_data["images"], jpg_dir, train2_dir)

# test2 이미지를 test2 폴더로 복사
copy_images(test2_data["images"], jpg_dir, test2_dir)

print(f"이미지 복사가 완료되었습니다.")
print(f"Train2 이미지: {train2_dir}")
print(f"Test2 이미지: {test2_dir}")

이미지 복사가 완료되었습니다.
Train2 이미지: /home/minhyekj/chestXray/VinCR/jpg/train2
Test2 이미지: /home/minhyekj/chestXray/VinCR/jpg/test2


In [5]:
from PIL import Image

image_path = "/home/minhyekj/chestXray/VinCR/jpg/train2/f0b8c0d06a7cccc003f214b216673324.jpg"
image = Image.open(image_path)
print(image.size)  # This should return (1024, 1024)


(1024, 1024)


In [6]:
from PIL import Image
import os

# Directory containing the images
images_dir = "/home/minhyekj/chestXray/VinCR/jpg/train2"

# Get the first 10 image files from the directory
image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))][:10]

# Print the size of the first 10 images
image_sizes = {}
for image_file in image_files:
    image_path = os.path.join(images_dir, image_file)
    try:
        with Image.open(image_path) as img:
            image_sizes[image_file] = img.size
    except Exception as e:
        image_sizes[image_file] = f"Error: {e}"

image_sizes


{'34e635f644f6d4853776e2d74357b239.jpg': (1024, 1024),
 'e0ba9af0459793b1db8b61d44c32fa37.jpg': (1024, 1024),
 'adc6f4b5339f7178055838776bf49dd7.jpg': (1024, 1024),
 '2767502d5b33cf62b97fd2f04ee88c62.jpg': (1024, 1024),
 '3aaf9082b2fc37d21d61ae532d3112d4.jpg': (1024, 1024),
 'c49a36c11ecc7e24d2b0912e68da95ac.jpg': (1024, 1024),
 '08e3d3783927a15d35137c756c1a7e0b.jpg': (1024, 1024),
 'b94aacb47e639df00707eae5332dc629.jpg': (1024, 1024),
 'b91b76bcb6e0b005ef98719bd719b488.jpg': (1024, 1024),
 'ec675fa2723a03bd4bc753ca149e9022.jpg': (1024, 1024)}

In [8]:
import json

# Paths to the annotation file and output file
input_annotation_path = "/home/minhyekj/chestXray/VinCR/jpg/train2.json"
output_annotation_path = "/home/minhyekj/chestXray/VinCR/jpg/train3.json"

# Target width and height
target_width = 1024
target_height = 1024

# Load the COCO annotation file
with open(input_annotation_path, "r") as f:
    coco_data = json.load(f)

# Update the width and height for all images
for image in coco_data.get("images", []):
    image["width"] = target_width
    image["height"] = target_height

# Save the updated annotations to a new file
with open(output_annotation_path, "w") as f:
    json.dump(coco_data, f, indent=4)

print(f"Updated annotations saved to {output_annotation_path}")


Updated annotations saved to /home/minhyekj/chestXray/VinCR/jpg/train3.json
