In [2]:
import pandas as pd

In [5]:
# 주요 파일 경로
move_path = "../data/VL_csv/tn_move_his_이동내역_Cleaned_E.csv"
visit_area_path = "../data/VL_csv/tn_visit_area_info_방문지정보_Cleaned_E.csv"
photo_path = "../data/VL_csv/tn_tour_photo_관광사진_E.csv"

# 데이터 로딩
move_df = pd.read_csv(move_path)
photo_df = pd.read_csv(photo_path)
visit_area_df = pd.read_csv(visit_area_path)

In [7]:
# visit_area_df의 visit_area_nm 기반으로 새로운 고유 ID 부여
unique_visit_area_nms = visit_area_df["VISIT_AREA_NM"].unique()
visit_area_nm_to_new_id = {nm: idx for idx, nm in enumerate(unique_visit_area_nms)}
visit_area_df["NEW_VISIT_AREA_ID"] = visit_area_df["VISIT_AREA_NM"].map(visit_area_nm_to_new_id)

# photo_df의 VISIT_AREA_NM 결측값 채우기
photo_df = photo_df.merge(
    visit_area_df[["VISIT_AREA_ID", "TRAVEL_ID", "VISIT_AREA_NM"]],
    how="left",
    on=["VISIT_AREA_ID", "TRAVEL_ID"],
    suffixes=("", "_filled")
)
photo_df["VISIT_AREA_NM"] = photo_df["VISIT_AREA_NM"].fillna(photo_df["VISIT_AREA_NM_filled"])
photo_df.drop(columns=["VISIT_AREA_NM_filled"], inplace=True)

# visit_area_id -> visit_area_nm 매핑 생성
visit_area_id_to_nm = visit_area_df.set_index("VISIT_AREA_ID")["VISIT_AREA_NM"].to_dict()

# move_df의 start/end_visit_area_id를 새로운 id로 변환
move_df["START_VISIT_AREA_NM"] = move_df["START_VISIT_AREA_ID"].map(visit_area_id_to_nm)
move_df["END_VISIT_AREA_NM"] = move_df["END_VISIT_AREA_ID"].map(visit_area_id_to_nm)
move_df["START_NEW_ID"] = move_df["START_VISIT_AREA_NM"].map(visit_area_nm_to_new_id)
move_df["END_NEW_ID"] = move_df["END_VISIT_AREA_NM"].map(visit_area_nm_to_new_id)

# photo_df에도 새로운 id 컬럼 생성
photo_df["NEW_VISIT_AREA_ID"] = photo_df["VISIT_AREA_NM"].map(visit_area_nm_to_new_id)

# 결과 저장
visit_area_df.to_csv("../data/VL_csv/visit_area_with_new_id_final.csv", index=False)
move_df.to_csv("../data/VL_csv/move_with_new_id_final.csv", index=False)
photo_df.to_csv("../data/VL_csv/photo_with_new_id_final.csv", index=False)

# 결과 요약
{
    "visit_area_df_shape": visit_area_df.shape,
    "move_df_shape": move_df.shape,
    "photo_df_shape": photo_df.shape,
    "sample_visit_area_df": visit_area_df.head(2).to_dict(),
    "sample_move_df": move_df.head(2).to_dict(),
    "sample_photo_df": photo_df.head(2).to_dict()
}


{'visit_area_df_shape': (21384, 24),
 'move_df_shape': (21384, 12),
 'photo_df_shape': (14627, 13),
 'sample_visit_area_df': {'VISIT_AREA_ID': {0: 2304300001, 1: 2304300002},
  'TRAVEL_ID': {0: 'e_e000004', 1: 'e_e000004'},
  'VISIT_ORDER': {0: 1, 1: 2},
  'VISIT_AREA_NM': {0: '집', 1: '화성 관광열차 안내소 연무대 매표소'},
  'VISIT_START_YMD': {0: '2023-04-30', 1: '2023-04-30'},
  'VISIT_END_YMD': {0: '2023-04-30', 1: '2023-04-30'},
  'ROAD_NM_ADDR': {0: nan, 1: '경기 수원시 팔달구 창룡대로103번길 20'},
  'LOTNO_ADDR': {0: nan, 1: '경기 수원시 팔달구 매향동 3-32'},
  'X_COORD': {0: nan, 1: 127.0233392},
  'Y_COORD': {0: nan, 1: 37.2878779},
  'ROAD_NM_CD': {0: nan, 1: nan},
  'LOTNO_CD': {0: nan, 1: nan},
  'POI_ID': {0: nan, 1: 'POI01000000ALZU7R'},
  'POI_NM': {0: nan, 1: '동대문종합시장 악세서리부자재시장'},
  'RESIDENCE_TIME_MIN': {0: nan, 1: 60.0},
  'VISIT_AREA_TYPE_CD': {0: 21, 1: 2},
  'REVISIT_YN': {0: nan, 1: 'N'},
  'VISIT_CHC_REASON_CD': {0: nan, 1: 10.0},
  'LODGING_TYPE_CD': {0: nan, 1: nan},
  'DGSTFN': {0: nan, 1: 4.0},
  'R