# 0. 데이터 로드

In [2]:
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

data_path = '../data/VL_csv/'

traveller_df = pd.read_csv(data_path + 'tn_traveller_master_여행객 Master_E.csv')
visit_df = pd.read_csv(data_path + 'tn_visit_area_info_방문지정보_E.csv')
move_df = pd.read_csv(data_path + 'tn_move_his_이동내역_E.csv')
# poi_df = pd.read_csv(data_path +'tn_poi_master_POIMaster_all.csv')

# 1. 노드 ID 및 속성 구성

In [None]:
# 1. USER 노드
user_nodes = traveller_df[['TRAVELER_ID', 'AGE_GRP', 'RESIDENCE_SGG_CD', 'GENDER']]
user_nodes['USER_ID'] = LabelEncoder().fit_transform(user_nodes['TRAVELER_ID'])

user_features = pd.get_dummies(user_nodes[['AGE_GRP', 'RESIDENCE_SGG_CD', 'GENDER']])
user_x = torch.tensor(user_features.values, dtype=torch.float)

In [None]:
# 2. VISIT_AREA 노드
visit_nodes = visit_df[['VISIT_AREA_ID', 'X_COORD', 'Y_COORD', 'VISIT_AREA_TYPE_CD']]
visit_nodes['VISIT_AREA_ID'] = visit_nodes['VISIT_AREA_ID'].astype(str)
visit_nodes['VISIT_ID'] = LabelEncoder().fit_transform(visit_nodes['VISIT_AREA_ID'])

visit_features = pd.get_dummies(visit_nodes[['VISIT_AREA_TYPE_CD']])
visit_x = torch.tensor(visit_features.values, dtype=torch.float)

In [None]:
# 3. POI 노드
poi_nodes = poi_df[['POI_ID', 'X_COORD', 'Y_COORD']]
poi_nodes['POI_ID'] = poi_nodes['POI_ID'].astype(str)
poi_nodes['POI_NODE_ID'] = LabelEncoder().fit_transform(poi_nodes['POI_ID'])

poi_x = torch.tensor(poi_nodes[['X_COORD', 'Y_COORD']].values, dtype=torch.float)

# 2. 엣지 생성 (장소간 이동)

In [None]:
# 이동 데이터에서 엣지 추출
move_df = move_df.dropna(subset=['START_VISIT_AREA_ID', 'END_VISIT_AREA_ID'])
move_df['START'] = LabelEncoder().fit_transform(move_df['START_VISIT_AREA_ID'].astype(str))
move_df['END'] = LabelEncoder().fit_transform(move_df['END_VISIT_AREA_ID'].astype(str))

# 이동수단 코드 기반으로 엣지 나누기
edge_index_dict = {}
for move_type in move_df['MVMN_CD_1'].unique():
    filtered = move_df[move_df['MVMN_CD_1'] == move_type]
    edge_index = torch.tensor([filtered['START'].values, filtered['END'].values], dtype=torch.long)
    
    edge_index_dict[('visit_area', f'move_{move_type}', 'visit_area')] = edge_index


# 3. HeteroData 객체 조립

In [None]:
from torch_geometric.data import HeteroData

data = HeteroData()

# 노드 추가
data['user'].x = user_x
data['visit_area'].x = visit_x
data['poi'].x = poi_x

# 엣지 추가
for k, v in edge_index_dict.items():
    data[k].edge_index = v