In [27]:
import os
import pandas as pd

### Load image files

In [37]:
root_folder = "D:/hyundai/dataset/data/original/2024"

names = ["DF", "CE", "GN7 일반", "GN7 파노라마", "ME 일반"]
file_path = {name: [] for name in names}

for folder_name, subfolders, filenames in os.walk(root_folder):
    for filename in filenames:
        if filename.lower().endswith('.jpg'):
            full_path = os.path.join(folder_name, filename)
            for name in names:
                if name in full_path:
                    file_path[name].append(full_path)

In [42]:
total_data = 0

for name, paths in file_path.items():
    print(f"{name}: {len(paths)} paths")
    total_data += len(paths)

print("total number of data : ", total_data)

DF: 563135 paths
CE: 132788 paths
GN7 일반: 303141 paths
GN7 파노라마: 79884 paths
ME 일반: 132 paths
total number of data :  1079080


### Load ROI csv files

In [41]:
csv_root_folder = "D:/hyundai/dataset/data/roi"

csv_file_dict = {
    "DF": "02_ROIprofile.csv",
    "CE": "03_ROIprofile.csv",
    "GN7 일반": "05_ROIprofile.csv",
    "GN7 파노라마": "06_ROIprofile.csv",
    "ME 일반": "07_ROIprofile.csv",
}
roi_dfs = {}

for folder_name, _, filenames in os.walk(csv_root_folder):
    for filename in filenames:
        if filename.lower().endswith('.csv'):
            full_path = os.path.join(folder_name, filename)
            for name in names:
                if csv_file_dict[name] in full_path:
                    roi_dfs[name] = pd.read_csv(full_path, header=None)

In [43]:
roi_dfs

{'DF':    0    1    2    3    4    5    6    7    8    9    ...  286  287  288  289  \
 0  368  226  394  226  394  260  368  260  608  389  ...    0    0    0    0   
 1  314   93  416  111  402  191  300  173  302  118  ...  280  199  280  117   
 2  366  116  435  193  374  248  305  171  392  101  ...    0    0    0    0   
 3  357  114  426  191  365  246  296  169  392  101  ...    0    0    0    0   
 
    290  291  292  293  294  295  
 0    0    0    0    0    0    0  
 1  384  117  384  199  280  199  
 2    0    0    0    0    0    0  
 3    0    0    0    0    0    0  
 
 [4 rows x 296 columns],
 'CE':    0    1    2    3    4    5    6    7    8    9    ...  406  407  408  409  \
 0  323  270  351  202  409  227  381  295  617  419  ...    0    0    0    0   
 1  485  190  488  297  400  300  397  193  485  190  ...    0    0    0    0   
 2  518  202  518  308  430  308  430  202  518  202  ...    0    0    0    0   
 3  542  326  543  326  543  328  542  328    0    0  .

In [123]:
image_indexes = {}

for name, df in roi_dfs.items():
    row_count = []
    
    max_len = len(df.iloc[0])
    max_count = int((max_len)/8)

    for _, row in df.iterrows():
        count = 0

        for idx in range(max_len, 0, -8):  # 마지막 인덱스에서 시작하여 8개씩 거슬러 올라갑니다.
            section_start = max(0, idx-8)  # 섹션 시작점 (0보다 작아지지 않도록 조정)
            row_val = row[section_start:idx]

            if all(val == 0 for val in row_val):
                count += 1  # 모든 값이 0인 경우, 카운트 증가
            else:
                break  # 하나라도 0이 아닌 값이 있으면 반복 중단

        row_count.append(max_count - count)

    image_indexes[name] = row_count

In [124]:
# 결과 출력
for name, indexes in image_indexes.items():
    print(f"{name}: {indexes}")

DF: [30, 37, 6, 7]
CE: [7, 8, 30, 6, 52]
GN7 일반: [40, 49, 21]
GN7 파노라마: [40, 49, 20, 7]
ME 일반: [22, 39, 23, 36, 11]


### Crop Image