In [1]:
import os, glob, json, math, csv
from sklearn.model_selection import train_test_split
import pandas as pd

ROOT = "/home/bocchi/workspace/capstone/simulation/workspace/_output_clips"
T, S = 16, 4      # window length / stride
EPS = 1e-3        # to avoid div-by-zero
H   = 4.0         # horizon for t* normalization (seconds)

In [2]:
def is_finite(x):
    try:
        return (x is not None) and math.isfinite(float(x))
    except:
        return False

In [3]:
rows = []  # 한 줄 = 윈도우

In [6]:
clip_dirs = sorted(glob.glob(os.path.join(ROOT, "clip_*")))
print("clips:", len(clip_dirs))

clips: 101


In [8]:
for clip_dir in clip_dirs:
    frames = sorted(glob.glob(os.path.join(clip_dir, "frame_*.jpg")))
    if len(frames) < T:
        continue

    for i in range(0, len(frames)-T+1, S):
        window = frames[i:i+T]
        # 간단히: 윈도 마지막 프레임의 json을 타깃으로 사용
        jf = window[-1].replace(".jpg", ".json")
        if not os.path.exists(jf):
            continue

        with open(jf, "r") as f:
            meta = json.load(f)

        # 원본 타깃
        min_ttc     = meta.get("min_ttc")          # seconds or None
        min_distance= meta.get("min_distance")     # meters
        best_dmin   = meta.get("best_dmin")        # meters
        best_tstar  = meta.get("best_tstar")       # seconds

        # 유효성 마스크
        m_ttc   = 1 if is_finite(min_ttc)      and float(min_ttc)   >= 0 else 0
        m_dist  = 1 if is_finite(min_distance) and float(min_distance) > 0 else 0
        m_dmin  = 1 if is_finite(best_dmin)    and float(best_dmin) > 0 else 0
        m_tstar = 1 if is_finite(best_tstar)   and float(best_tstar)>= 0 else 0

        # 회귀용 변환 타깃
        if m_ttc:
            y_ttc_inv = 1.0 / (float(min_ttc) + EPS)
        else:
            y_ttc_inv = 0.0   # mask로 무시되므로 값은 의미 없음

        if m_dmin:
            y_dmin_inv = 1.0 / (float(best_dmin) + EPS)
        else:
            y_dmin_inv = 0.0

        if m_tstar:
            tstar_clip = max(0.0, min(float(best_tstar), H))
            y_tstar_norm = tstar_clip / H
        else:
            y_tstar_norm = 0.0

        if m_dist:
            y_dist_inv = 1.0 / (float(min_distance) + EPS)
        else:
            y_dist_inv = 0.0

        rows.append(
            [os.path.basename(clip_dir), os.path.basename(window[0])]
            + window
            # 변환된 회귀 타깃
            + [y_ttc_inv, y_dmin_inv, y_tstar_norm, y_dist_inv]
            # 원본 값도 보관(디버깅/모니터링용)
            + [min_ttc, best_dmin, best_tstar, min_distance]
            # 마스크(학습 시 손실 제외에 사용)
            + [m_ttc, m_dmin, m_tstar, m_dist]
        )


In [9]:
# 저장
header = (
    ["clip_id","start_frame"] + [f"f{k}" for k in range(T)] +
    ["y_ttc_inv","y_dmin_inv","y_tstar_norm","y_dist_inv"] +
    ["raw_min_ttc","raw_best_dmin","raw_best_tstar","raw_min_distance"] +
    ["mask_ttc","mask_dmin","mask_tstar","mask_dist"]
)

In [10]:
df = pd.DataFrame(rows, columns=header)

clips = df['clip_id'].unique()
train_clips, temp = train_test_split(clips, test_size=0.30, random_state=42)
val_clips, test_clips = train_test_split(temp, test_size=0.50, random_state=42)

In [11]:
train_df = df[df["clip_id"].isin(train_clips)].reset_index(drop=True)
val_df   = df[df["clip_id"].isin(val_clips)].reset_index(drop=True)
test_df  = df[df["clip_id"].isin(test_clips)].reset_index(drop=True)

In [12]:
train_df.to_csv("index_train.csv", index=False)
val_df.to_csv("index_val.csv", index=False)
test_df.to_csv("index_test.csv", index=False)

print("train:", len(train_df), "val:", len(val_df), "test:", len(test_df))

train: 2378 val: 506 test: 543


In [13]:
print('total:', len(train_df) + len(val_df) + len(test_df))

total: 3427
