### pax_icon 생성

In [None]:
# make_viz_jsons_pax_only.py
# - outputs/{시나리오}/events.json을 읽어
#   pax_icon_{N}.json (요청/배차/픽업/드롭 시간 + 배차시도 히스토리 포함)만 생성

import os, json
from pathlib import Path
from collections import defaultdict
import pandas as pd

# ===== 사용자 설정 =====
SCENARIO_DIR   = "../outputs/50대"
PARQUET_PATH   = "../data/processed/420_540_trip_both.parquet" #KEY1 값이 필요해서 원본파일 사용
REACT_DATA_DIR = "../react-project/public/data"
NUM_LABEL      = 50   # 차량대수 라벨(파일명 접미사)

# ---------- 유틸 ----------
def jload(p):
    if not os.path.exists(p):
        return None
    with open(p, "r", encoding="utf-8") as f:
        try:
            return json.load(f)
        except Exception:
            f.seek(0)
            return [json.loads(line) for line in f if line.strip()]

def jdump(obj, p):
    os.makedirs(os.path.dirname(p), exist_ok=True)
    with open(p, "w", encoding="utf-8") as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)

def get_list(obj):
    if obj is None: return []
    if isinstance(obj, list): return obj
    if isinstance(obj, dict):
        for k in ("events","moves","points","data","rows","list"):
            if isinstance(obj.get(k), list):
                return obj[k]
    return []

def to_seconds_series(s):
    s = pd.to_numeric(s, errors="coerce")
    med = s.dropna().median()
    if pd.isna(med):    return s
    if med < 1_000:     return s * 60.0   # 분 → 초
    if med > 1e11:      return s / 1000.0 # ms → s
    return s

# ---------- 입력 ----------
ev_path = str(Path(SCENARIO_DIR) / "events.json")

events_raw = jload(ev_path)
events     = get_list(events_raw)

print(f"[PATH] {SCENARIO_DIR}")
print(" - events.json :", os.path.exists(ev_path), f"({len(events)} rows)")

if not events:
    print("[WARNING] events.json이 비어있거나 없습니다. pax_icon은 원본 parquet 데이터만으로 생성됩니다.")

# ---------- pax_icon_{N}.json ----------
# ① 원본 파케(요청/OD 좌표)
pq = pd.read_parquet(PARQUET_PATH).copy()
pq["KEY1"] = pq["KEY1"].astype(str)

col_map = {
    "id":"KEY1",
    "o_x":"출발_x","o_y":"출발_y",
    "d_x":"도착_x","d_y":"도착_y",
    "t_req":"승차_timestamp",
}

base = pd.DataFrame({"pax_ID": pq[col_map["id"]].astype(str)})
if col_map["o_x"] in pq.columns: base["o_lon"] = pd.to_numeric(pq[col_map["o_x"]], errors="coerce")
if col_map["o_y"] in pq.columns: base["o_lat"] = pd.to_numeric(pq[col_map["o_y"]], errors="coerce")
if col_map["d_x"] in pq.columns: base["d_lon"] = pd.to_numeric(pq[col_map["d_x"]], errors="coerce")
if col_map["d_y"] in pq.columns: base["d_lat"] = pd.to_numeric(pq[col_map["d_y"]], errors="coerce")
if col_map["t_req"] in pq.columns:
    base["t_req"] = to_seconds_series(pq[col_map["t_req"]])
base = base.drop_duplicates("pax_ID").set_index("pax_ID")

# ② events에서 ASSIGN/픽업/드롭 추출
pick_by_id, drop_by_id, veh_by_id = {}, {}, {}
assign_hist = defaultdict(list)   # rid -> list of {t, attempt, veh_id}

for e in events:
    et = str(e.get("type","")).upper()
    rid = e.get("req_id")
    if rid is None: 
        continue
    rid = str(rid)

    t = e.get("t") or e.get("ts") or e.get("timestamp") or e.get("time")
    if t is None: 
        continue
    t = float(t)

    lon,lat = e.get("lon"), e.get("lat")
    vid     = e.get("veh_id")
    att     = e.get("attempt")

    if et=="ASSIGN":
        assign_hist[rid].append({"t": t, "attempt": int(att or 1), "veh_id": vid})
        if vid: veh_by_id[rid] = vid
    elif et=="PICKUP":
        if (rid not in pick_by_id) or (t < pick_by_id[rid]["t"]):
            pick_by_id[rid] = {"t":t,"lon":lon,"lat":lat}
            if vid: veh_by_id[rid] = vid
    elif et=="DROPOFF":
        if (rid not in drop_by_id) or (t < drop_by_id[rid]["t"]):
            drop_by_id[rid] = {"t":t,"lon":lon,"lat":lat}

# ③ rows 생성
rows = []
for rid in base.index:
    assigned = (rid in pick_by_id)  # 픽업 이벤트가 있으면 배차 성공
    p = pick_by_id.get(rid, {})
    d = drop_by_id.get(rid, {})

    def _get(col, default=0.0):
        return float(base.at[rid, col]) if (col in base.columns and pd.notna(base.at[rid,col])) else default

    plon = float(p.get("lon")) if p.get("lon") is not None else _get("o_lon", 0.0)
    plat = float(p.get("lat")) if p.get("lat") is not None else _get("o_lat", 0.0)
    dlon = float(d.get("lon")) if d.get("lon") is not None else _get("d_lon", 0.0)
    dlat = float(d.get("lat")) if d.get("lat") is not None else _get("d_lat", 0.0)

    t_req  = float(base.at[rid,"t_req"]) if ("t_req" in base.columns and pd.notna(base.at[rid,"t_req"])) else None
    t_pick = float(p.get("t")) if p.get("t") is not None else None
    t_drop = float(d.get("t")) if d.get("t") is not None else None

    # 배차 히스토리 정렬 + 파생
    hist = sorted(assign_hist.get(rid, []), key=lambda x: x["t"])
    t_asg_first  = hist[0]["t"] if hist else None
    max_attempt  = hist[-1]["attempt"] if hist else 0

    rows.append({
        "pax_ID": rid,
        "assigned": bool(assigned),
        "vehicle_id": veh_by_id.get(rid),
        "pickup_location":  [plon, plat],
        "dropoff_location": [dlon, dlat],
        # 시간 정보(시뮬에서 상태 판단용)
        "t_req":  t_req,
        "t_asg":  t_asg_first,
        "t_pick": t_pick,
        "t_drop": t_drop,
        # 배차 시도 히스토리
        "assign_history": hist,     # [{t, attempt, veh_id}, ...]
        "max_attempt": max_attempt
    })

pax_path = str(Path(REACT_DATA_DIR) / "pax_icon" / f"pax_icon_{NUM_LABEL}.json")
jdump(rows, pax_path)
print("[SAVE]", pax_path, f"({len(rows)} pax)")
print(f"  - assigned: {sum(1 for r in rows if r['assigned'])}")
print(f"  - not assigned: {sum(1 for r in rows if not r['assigned'])}")

[PATH] ../outputs/50대
 - events.json : True (4084 rows)
[SAVE] ../react-project/public/data/pax_icon/pax_icon_50_test.json (1366 pax)
  - assigned: 1359
  - not assigned: 7


### trip.json 파일 생성

In [7]:
# make_trip_from_moves_osrm.py
# moves.json을 OSRM로 도로 polyline으로 샘플링해 trip_{N}.json 생성
# ⭐️ 도커 켜져있어야함 필수

import os, json, math, time
from pathlib import Path
import requests
from collections import defaultdict

# ==== 사용자 설정 ====
SCENARIO_DIR   = "../outputs/50대"                                              # 시나리오 폴더
REACT_DATA_DIR = "../react-project/public/data"                                # React public/data
NUM_LABEL      = 50                                                            # 파일명 라벨
OSRM_URL = "http://127.0.0.1:8000" 
SAMPLE_EVERY_M = 15.0  # polyline에서 대략 이 간격(미터)로 간격 샘플링 → 너무 작게하면 용량 ↑

# ==== 유틸 ====
def jload(p):
    with open(p, "r", encoding="utf-8") as f:
        try: return json.load(f)
        except:
            f.seek(0); return [json.loads(line) for line in f if line.strip()]

def jdump(obj, p):
    os.makedirs(os.path.dirname(p), exist_ok=True)
    with open(p, "w", encoding="utf-8") as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)

def haversine_m(lon1, lat1, lon2, lat2):
    R = 6371000.0
    dlon = math.radians(lon2 - lon1)
    dlat = math.radians(lat2 - lat1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1))*math.cos(math.radians(lat2))*math.sin(dlon/2)**2
    return 2*R*math.asin(math.sqrt(a))

def sample_polyline(coords, every_m=SAMPLE_EVERY_M):
    """coords: [[lon,lat], ...] 를 대략 every_m 간격으로 다운샘플링"""
    if not coords: return []
    out = [coords[0]]
    acc = 0.0
    for i in range(1, len(coords)):
        lon1, lat1 = coords[i-1]
        lon2, lat2 = coords[i]
        seg = haversine_m(lon1, lat1, lon2, lat2)
        acc += seg
        if acc >= every_m:
            out.append([lon2, lat2])
            acc = 0.0
    if out[-1] != coords[-1]:
        out.append(coords[-1])
    return out

def osrm_route(lon1, lat1, lon2, lat2, base=OSRM_URL):
    """OSRM route 좌표 배열(geojson) 반환"""
    url = f"{base}/route/v1/driving/{lon1},{lat1};{lon2},{lat2}"
    params = {"overview": "full", "geometries": "geojson"}
    r = requests.get(url, params=params, timeout=10)
    r.raise_for_status()
    js = r.json()
    routes = js.get("routes") or []
    if not routes: return []
    coords = routes[0]["geometry"]["coordinates"]
    return coords

# ==== 입력 ====
moves_path = str(Path(SCENARIO_DIR) / "moves.json")
tracks_path= str(Path(SCENARIO_DIR) / "tracks.json")
moves = jload(moves_path)
tracks = jload(tracks_path) or []

if not moves:
    raise SystemExit(f"[ERR] moves.json 비어있음: {moves_path}")

# 차량별 시작/끝 시각(타임바운드) 얻기 (tracks가 있으면)
t_minmax = {}
for tr in tracks:
    vid = tr.get("veh_id") or tr.get("vehicle_id")
    pts = tr.get("points") or []
    if not vid or not pts: continue
    times = [p.get("t") for p in pts if "t" in p]
    if not times: continue
    t_minmax[vid] = (min(times), max(times))

# ==== 차량별 trip 구성 ====
by_vehicle = defaultdict(list)
for mv in moves:
    vid = mv.get("veh_id")
    t0  = mv.get("t_start"); t1 = mv.get("t_end")
    lon1, lat1 = mv.get("lon1"), mv.get("lat1")
    lon2, lat2 = mv.get("lon2"), mv.get("lat2")
    if not (vid and isinstance(t0,(int,float)) and isinstance(t1,(int,float))):
        continue
    if None in (lon1, lat1, lon2, lat2):
        continue
    by_vehicle[vid].append((t0, t1, lon1, lat1, lon2, lat2))

# 타임순 정렬
for vid in by_vehicle:
    by_vehicle[vid].sort(key=lambda x: x[0])

trip_out = []
print(f"[INFO] vehicles in moves: {len(by_vehicle)}")
sess = requests.Session()

for vid, segs in by_vehicle.items():
    all_coords = []
    all_times  = []

    for (t0, t1, lon1, lat1, lon2, lat2) in segs:
        try:
            coords = osrm_route(lon1, lat1, lon2, lat2, base=OSRM_URL)
        except Exception as e:
            # 실패시 직선으로라도 이어주자
            coords = [[lon1,lat1],[lon2,lat2]]

        coords = sample_polyline(coords, every_m=SAMPLE_EVERY_M)

        # 각 좌표에 시간 할당(거리 비례 선형보간)
        dists = [0.0]
        for i in range(1, len(coords)):
            dists.append(dists[-1] + haversine_m(coords[i-1][0], coords[i-1][1], coords[i][0], coords[i][1]))
        total = dists[-1] if dists else 0.0
        times = []
        for d in dists:
            if total <= 0: 
                tau = 0.0
            else:
                tau = d / total
            times.append(t0 + (t1 - t0) * tau)

        # 첫 구간 이후면 앞점 중복 제거
        if all_coords and coords:
            if all_coords[-1] == coords[0]:
                coords = coords[1:]
                times = times[1:]

        all_coords.extend(coords)
        all_times.extend([int(round(x)) for x in times])

    # tracks의 타임 범위가 있으면 클리핑(선택)
    tmin, tmax = t_minmax.get(vid, (None,None))
    if tmin is not None and tmax is not None and all_coords:
        keep = []
        for xy, tt in zip(all_coords, all_times):
            if tmin <= tt <= tmax:
                keep.append((xy, tt))
        if keep:
            all_coords = [xy for xy,_ in keep]
            all_times  = [tt for _,tt in keep]

    if all_coords and all_times:
        trip_out.append({
            "vehicle_id": vid,
            "trip": all_coords,
            "timestamp": all_times
        })

trip_path = str(Path(REACT_DATA_DIR) / "trip" / f"trip_{NUM_LABEL}_test.json")
jdump(trip_out, trip_path)
print(f"[SAVE] {trip_path} ({len(trip_out)} vehicles)")
print("[DONE]")

[INFO] vehicles in moves: 50
[SAVE] ../react-project/public/data/trip/trip_50_test.json (50 vehicles)
[DONE]
