In [None]:
import os
import shutil
from PIL import Image

def pad_frames_to_multiple_of_12(frames, target_dir):
    """12배수보다 4프레임 이하로 부족하면 마지막 프레임을 복사해서 12배수로 맞춤"""
    num_frames = len(frames)
    remainder = num_frames % 12

    if remainder > 0 and (12 - remainder) <= 4:  # (12 - remainder)가 4 이하일 때만 패딩
        last_frame = frames[-1]
        for _ in range(12 - remainder):
            new_frame_path = os.path.join(target_dir, f"frame_{num_frames}.jpg")
            shutil.copy(last_frame, new_frame_path)
            num_frames += 1

def process_video_folders(base_dir, output_dir):
    """영상 폴더 내 프레임을 12배수 단위로 분할하고, 부족한 경우 보정"""

    drop_count = 0

    # 모든 영상 폴더를 순회
    for video_folder in os.listdir(base_dir):
        video_path = os.path.join(base_dir, video_folder)

        if not os.path.isdir(video_path):
            continue  # 폴더가 아닐 경우 스킵

        frames = sorted([os.path.join(video_path, f) for f in os.listdir(video_path) if f.endswith('.jpg')])

        if len(frames) < 8:  # 0~7개 프레임만 있는 폴더는 제외
            print(f"Skipping {video_folder} (only {len(frames)} frames)")
            continue

        num_frames = len(frames)
        num_subfolders = num_frames // 12  # 12배수로 자를 수 있는 그룹 개수
        remainder = num_frames % 12  # 나머지 프레임 수

        # 새로운 폴더 생성 및 프레임 복사
        for i in range(num_subfolders):
            subfolder_path = os.path.join(output_dir, f"{video_folder}_part{i+1}")
            os.makedirs(subfolder_path, exist_ok=True)

            start_idx = i * 12
            end_idx = start_idx + 12
            for frame_path in frames[start_idx:end_idx]:
                shutil.copy(frame_path, subfolder_path)

        # 나머지 프레임 처리
        if remainder > 0:
            last_part_path = os.path.join(output_dir, f"{video_folder}_part{num_subfolders+1}")
            os.makedirs(last_part_path, exist_ok=True)

            last_frames = frames[-remainder:]  # 남은 프레임
            if (12 - remainder) <= 4:  # 4개 이하 부족하면 마지막 프레임 복사해서 12배수로 맞추기
                pad_frames_to_multiple_of_12(last_frames, last_part_path)  # 마지막 프레임 복사해서 채우기
            elif (12 - remainder) > 4:
                print(f"Dropping {remainder} frames from {video_folder}")  # 4프레임 초과하면 삭제
                drop_count += 1

        print(f"Processed {video_folder}: {num_subfolders} folders created.")

    print("----Complete----")
    print(f"total drop count = {drop_count}")

In [None]:
# 실행 경로 설정
base_directory = "/content/drive/MyDrive/DMS/data/DMD/dmd_frames"  # 원본 프레임 폴더들이 있는 경로
output_directory = "/content/drive/MyDrive/DMS/data/DMD/dmd_frames_12x"  # 정리된 프레임이 저장될 경로

# 정리 실행
process_video_folders(os.path.join(base_directory, 'Normal'), os.path.join(output_directory, 'Normal'))

Processed gB-9(File9)_s2_face_2019-03-07-16;21;20_9_6.avi: 5 folders created.
Processed gB-9(File9)_s4_face_2019-03-22-09;43;35_9_2.avi: 8 folders created.
Dropping 6 frames from gB-9(File9)_s2_face_2019-03-07-16;21;20_9_1.avi
Processed gB-9(File9)_s2_face_2019-03-07-16;21;20_9_1.avi: 5 folders created.
Dropping 4 frames from gB-9(File9)_s3_face_2019-03-07-16;29;41_9_0.avi
Processed gB-9(File9)_s3_face_2019-03-07-16;29;41_9_0.avi: 8 folders created.
Dropping 3 frames from gB-9(File9)_s2_face_2019-03-07-16;21;20_9_11.avi
Processed gB-9(File9)_s2_face_2019-03-07-16;21;20_9_11.avi: 2 folders created.
Processed gB-9(File9)_s4_face_2019-03-22-09;43;35_9_8.avi: 1 folders created.
Processed gB-9(File9)_s4_face_2019-03-22-09;43;35_9_1.avi: 2 folders created.
Dropping 6 frames from gB-9(File9)_s4_face_2019-03-22-09;43;35_9_7.avi
Processed gB-9(File9)_s4_face_2019-03-22-09;43;35_9_7.avi: 4 folders created.
Dropping 4 frames from gB-9(File9)_s3_face_2019-03-07-16;29;41_9_3.avi
Processed gB-9(File

---
# 검증

In [None]:
dmd_frames_12x_path = '/content/drive/MyDrive/DMS/data/DMD/dmd_frames_12x'

In [None]:
import os
import shutil

def check_and_remove_invalid_folders(base_dir):
    """각 폴더의 이미지 개수를 확인하고, 12개가 아닌 폴더를 삭제"""

    incorrect_folders = []  # 12개가 아닌 폴더를 저장할 리스트

    # 모든 폴더를 순회
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)

        if not os.path.isdir(folder_path):  # 폴더가 아닌 경우 스킵
            continue

        # 폴더 내 이미지 개수 확인
        image_count = len([f for f in os.listdir(folder_path) if f.endswith('.jpg')])

        if image_count != 12:  # 이미지 개수가 12개가 아닌 경우 저장
            shutil.rmtree(folder_path)  # 폴더 삭제
            incorrect_folders.append((folder, image_count))
            print(f"🗑 Deleted {folder}: {image_count} images")

    # 결과 출력
    if incorrect_folders:
        print("📌 삭제한 폴더 목록:")
        for folder, count in incorrect_folders:
            print(f"❌ {folder}: {count} images")
    else:
        print("✅ 모든 폴더가 12개의 이미지를 포함하고 있습니다.")

In [None]:
# 폴더 확인 실행
check_and_remove_invalid_folders(os.path.join(dmd_frames_12x_path, 'Normal'))

🗑 Deleted gE-28(File18)_s2_face_2019-03-15-10;12;30_28_2.avi_part2: 0 images
🗑 Deleted gE-27(File17)_s2_face_2019-03-07-13;05;11_27_12.avi_part4: 0 images
🗑 Deleted gA-5(File5)_s4_face_2019-03-22-11;28;00_5_6.avi_part4: 0 images
🗑 Deleted gA-3(File3)_s1_face_2019-03-08-10;27;38_3_15.avi_part2: 0 images
🗑 Deleted gE-29(File19)_s2_face_2019-03-15-13;42;24_29_4.avi_part9: 0 images
🗑 Deleted gE-26(File16)_s1_face_2019-03-15-09;25;24_26_22.avi_part1: 4 images
🗑 Deleted gE-28(File18)_s2_face_2019-03-15-10;12;30_28_12.avi_part5: 0 images
🗑 Deleted gE-26(File16)_s3_face_2019-03-15-09;38;23_26_3.avi_part7: 0 images
🗑 Deleted gE-28(File18)_s2_face_2019-03-15-10;12;30_28_13.avi_part5: 0 images
🗑 Deleted gE-28(File18)_s2_face_2019-03-15-10;12;30_28_5.avi_part2: 0 images
🗑 Deleted gE-30(File20)_s3_face_2019-03-15-10;56;02_30_0.avi_part10: 0 images
🗑 Deleted gB-9(File9)_s1_face_2019-03-07-16;36;24_9_22.avi_part6: 2 images
🗑 Deleted gE-28(File18)_s2_face_2019-03-15-10;12;30_28_4.avi_part6: 0 images
🗑

In [None]:
# 폴더 확인 실행
check_and_remove_invalid_folders(os.path.join(dmd_frames_12x_path, 'PhoneCall'))

🗑 Deleted gB-6(File6)_s2_face_2019-03-11-13;46;14_6_1.avi_part24: 0 images
🗑 Deleted gB-7(File7)_s4_face_2019-03-22-10;42;09_7_2.avi_part1: 4 images
🗑 Deleted gB-7(File7)_s4_face_2019-03-22-10;42;09_7_1.avi_part19: 0 images
🗑 Deleted gB-8(File8)_s2_face_2019-03-11-14;38;44_8_0.avi_part18: 3 images
🗑 Deleted gB-8(File8)_s2_face_2019-03-11-14;38;44_8_1.avi_part16: 2 images
🗑 Deleted gB-9(File9)_s4_face_2019-03-22-09;43;35_9_1.avi_part6: 0 images
🗑 Deleted gB-10(File10)_s2_face_2019-03-11-15;15;21_10_0.avi_part46: 0 images
🗑 Deleted gB-10(File10)_s2_face_2019-03-11-15;15;21_10_1.avi_part33: 0 images
🗑 Deleted gE-29(File19)_s2_face_2019-03-15-13;42;24_29_0.avi_part22: 0 images
🗑 Deleted gE-29(File19)_s2_face_2019-03-15-13;42;24_29_1.avi_part27: 0 images
🗑 Deleted gZ-37(File31)_s4_face_2019-04-03-15;40;42_37_0.avi_part16: 1 images
🗑 Deleted gZ-36(File30)_s4_face_2019-04-03-15;01;29_36_0.avi_part16: 4 images
🗑 Deleted gE-26(File16)_s2_face_2019-03-15-09;15;51_26_1.avi_part19: 0 images
🗑 Dele

In [None]:
# 폴더 확인 실행
check_and_remove_invalid_folders(os.path.join(dmd_frames_12x_path, 'Text'))

🗑 Deleted gA-4(File4)_s4_face_2019-03-25-11;44;29_4_0.avi_part12: 0 images
🗑 Deleted gA-5(File5)_s2_face_2019-03-08-10;46;46_5_2.avi_part10: 0 images
🗑 Deleted gA-5(File5)_s2_face_2019-03-08-10;46;46_5_1.avi_part1: 4 images
🗑 Deleted gA-5(File5)_s2_face_2019-03-08-10;46;46_5_4.avi_part10: 0 images
🗑 Deleted gB-6(File6)_s2_face_2019-03-11-13;46;14_6_2.avi_part9: 0 images
🗑 Deleted gB-6(File6)_s4_face_2019-03-20-13;42;00_6_6.avi_part3: 0 images
🗑 Deleted gB-6(File6)_s4_face_2019-03-20-13;42;00_6_2.avi_part6: 0 images
🗑 Deleted gB-6(File6)_s4_face_2019-03-20-13;42;00_6_1.avi_part2: 0 images
🗑 Deleted gB-7(File7)_s2_face_2019-03-11-14;12;25_7_2.avi_part7: 0 images
🗑 Deleted gB-7(File7)_s4_face_2019-03-22-10;42;09_7_2.avi_part7: 0 images
🗑 Deleted gB-7(File7)_s4_face_2019-03-22-10;42;09_7_1.avi_part4: 0 images
🗑 Deleted gB-7(File7)_s4_face_2019-03-22-10;42;09_7_3.avi_part4: 0 images
🗑 Deleted gB-7(File7)_s4_face_2019-03-22-10;42;09_7_0.avi_part4: 0 images
🗑 Deleted gB-8(File8)_s4_face_2019-

In [None]:
check_and_remove_invalid_folders(os.path.join(dmd_frames_12x_path, 'Normal'))
check_and_remove_invalid_folders(os.path.join(dmd_frames_12x_path, 'PhoneCall'))
check_and_remove_invalid_folders(os.path.join(dmd_frames_12x_path, 'Text'))

✅ 모든 폴더가 12개의 이미지를 포함하고 있습니다.
✅ 모든 폴더가 12개의 이미지를 포함하고 있습니다.
✅ 모든 폴더가 12개의 이미지를 포함하고 있습니다.


---
# 사람수 count

In [3]:
import os

def extract_folder_info_to_df(base_dir):
    folder_info = []

    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)

        if not os.path.isdir(folder_path):  # 폴더가 아닐 경우 스킵
            continue

        splits = folder.split("_")

        person_id = splits[0]

        folder_info.append([person_id])

    return folder_info


# 실행할 폴더 경로
base_directory = "/content/drive/MyDrive/DMS/data/DMD/dmd_frames_12x/Normal"

# 폴더 정보 분석 실행 및 DataFrame 저장
norm_info = extract_folder_info_to_df(base_directory)

In [6]:
import numpy as np

print(np.unique(norm_info))
print(len(np.unique(norm_info)))

['gA-1(File1)' 'gA-2(File2)' 'gA-3(File3)' 'gA-4(File4)' 'gA-5(File5)'
 'gB-10(File10)' 'gB-6(File6)' 'gB-7(File7)' 'gB-8(File8)' 'gB-9(File9)'
 'gE-26(File16)' 'gE-27(File17)' 'gE-28(File18)' 'gE-29(File19)'
 'gE-30(File20)' 'gF-22(File21)' 'gF-23(File22)' 'gF-24(File23)'
 'gF-25(File24)' 'gZ-31(File25)' 'gZ-32(File26)' 'gZ-33(File27)'
 'gZ-34(File28)' 'gZ-35(File29)' 'gZ-36(File30)' 'gZ-37(File31)']
26


In [4]:
# 실행할 폴더 경로
base_directory = "/content/drive/MyDrive/DMS/data/DMD/dmd_frames_12x/PhoneCall"

# 폴더 정보 분석 실행 및 DataFrame 저장
call_info = extract_folder_info_to_df(base_directory)

In [7]:
print(np.unique(call_info))
print(len(np.unique(call_info)))

['gA-1(File1)' 'gA-2(File2)' 'gA-3(File3)' 'gA-4(File4)' 'gA-5(File5)'
 'gB-10(File10)' 'gB-6(File6)' 'gB-7(File7)' 'gB-8(File8)' 'gB-9(File9)'
 'gE-26(File16)' 'gE-27(File17)' 'gE-28(File18)' 'gE-29(File19)'
 'gE-30(File20)' 'gF-22(File21)' 'gF-23(File22)' 'gF-24(File23)'
 'gF-25(File24)' 'gZ-31(File25)' 'gZ-32(File26)' 'gZ-33(File27)'
 'gZ-34(File28)' 'gZ-35(File29)' 'gZ-36(File30)' 'gZ-37(File31)']
26


In [8]:
# 실행할 폴더 경로
base_directory = "/content/drive/MyDrive/DMS/data/DMD/dmd_frames_12x/Text"

# 폴더 정보 분석 실행 및 DataFrame 저장
text_info = extract_folder_info_to_df(base_directory)

In [None]:
print(np.unique(text_info))
print(len(np.unique(text_info)))

['gA-1(File1)' 'gA-2(File2)' 'gA-3(File3)' 'gA-4(File4)' 'gA-5(File5)'
 'gB-10(File10)' 'gB-6(File6)' 'gB-7(File7)' 'gB-8(File8)' 'gB-9(File9)'
 'gE-26(File16)' 'gE-27(File17)' 'gE-28(File18)' 'gE-29(File19)'
 'gE-30(File20)' 'gF-22(File21)' 'gF-23(File22)' 'gF-24(File23)'
 'gF-25(File24)' 'gZ-31(File25)' 'gZ-32(File26)' 'gZ-33(File27)'
 'gZ-34(File28)' 'gZ-35(File29)' 'gZ-36(File30)' 'gZ-37(File31)']
26
