In [86]:

import pickle
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import HeteroConv, SAGEConv, Linear

In [87]:
class RouteGNN(nn.Module):
    def __init__(self, metadata, hidden_channels=128):
        super().__init__()
        self.metadata = metadata

        self.embeddings = nn.ModuleDict({
            'user': Linear(17, hidden_channels),
            'travel': Linear(21, hidden_channels),
            'visit_area': Linear(34, hidden_channels),
        })

        self.gnn1 = HeteroConv({
            edge_type: SAGEConv((-1, -1), hidden_channels)
            for edge_type in metadata[1]
        }, aggr='sum')

        self.gnn2 = HeteroConv({
            edge_type: SAGEConv((hidden_channels, hidden_channels), hidden_channels)
            for edge_type in metadata[1]
        }, aggr='sum')

        self.link_predictor = nn.Sequential(
            nn.Linear(2 * hidden_channels, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, 1)
        )

    def forward(self, x_dict, edge_index_dict):
        x_dict = {
            node_type: self.embeddings[node_type](x) if x is not None else None
            for node_type, x in x_dict.items()
        }

        x_dict = self.gnn1(x_dict, edge_index_dict)
        x_dict = {k: F.relu(v) for k, v in x_dict.items() if v is not None}
        x_dict = self.gnn2(x_dict, edge_index_dict)

        return x_dict

    def predict_link(self, node_embed, edge_index):
        src, dst = edge_index
        z_src = node_embed[src]
        z_dst = node_embed[dst]
        z = torch.cat([z_src, z_dst], dim=-1)
        return self.link_predictor(z).squeeze(-1)


In [88]:
# 기타 pkl 불러오기
with open('./pickle/user_id_to_index.pkl', 'rb') as f:
    user_id_to_index = pickle.load(f)

with open('./pickle/travel_id_to_index.pkl', 'rb') as f:
    travel_id_to_index = pickle.load(f)

with open('./pickle/visit_area_id_to_index.pkl', 'rb') as f:
    visit_area_id_to_index = pickle.load(f)

with open('./pickle/dataset.pkl', 'rb') as f:
    data = pickle.load(f)

visit_area_df = pd.read_pickle('./pickle/visit_area_df.pkl')

In [89]:
# 모델 로드
model = RouteGNN(data.metadata())
model.load_state_dict(torch.load('./pickle/routegnn_model.pt'))
model.eval()

RouteGNN(
  (embeddings): ModuleDict(
    (user): Linear(17, 128, bias=True)
    (travel): Linear(21, 128, bias=True)
    (visit_area): Linear(34, 128, bias=True)
  )
  (gnn1): HeteroConv(num_relations=5)
  (gnn2): HeteroConv(num_relations=5)
  (link_predictor): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=1, bias=True)
  )
)

In [90]:
print(data.edge_index_dict.keys())

dict_keys([('user', 'traveled', 'travel'), ('travel', 'contains', 'visit_area'), ('visit_area', 'moved_to', 'visit_area'), ('travel', 'traveled_by', 'user'), ('visit_area', 'contained_in', 'travel')])


# 추론을 위한 함수

In [91]:
def recommend_route(node_embed, edge_index, edge_scores, start_node=None, max_steps=5):
    """
    visit_area 노드 임베딩, 엣지 index, score가 주어졌을 때
    가장 높은 score 기준으로 동선을 구성하는 greedy 경로 추천 함수
    """
    from collections import defaultdict

    # 엣지를 점수 기준으로 정렬
    scored_edges = list(zip(edge_index[0].tolist(), edge_index[1].tolist(), edge_scores.tolist()))
    scored_edges.sort(key=lambda x: -x[2])  # 높은 점수 순

    # 경로 생성
    visited = set()
    route = []

    current = start_node if start_node is not None else scored_edges[0][0]
    visited.add(current)
    route.append(current)

    for _ in range(max_steps - 1):
        # current에서 시작하는 후보 중 아직 방문하지 않은 곳
        candidates = [dst for src, dst, score in scored_edges if src == current and dst not in visited]
        if not candidates:
            break
        next_node = candidates[0]  # greedy하게 최고 점수 선택
        visited.add(next_node)
        route.append(next_node)
        current = next_node

    return route  # index 형태


In [92]:
def infer_route(model, data, user_input, travel_input, k=5, device='cpu', batch_size=1000000):
    model.eval()
    data = data.to(device)
    user_input = user_input.to(device)
    travel_input = travel_input.to(device)

    with torch.no_grad():
        # 유저/여행 feature + 기존 raw feature 합치기
        x_dict_raw = {
            'user': torch.cat([data['user'].x, user_input], dim=0),       # [N+1, 17]
            'travel': torch.cat([data['travel'].x, travel_input], dim=0), # [M+1, 21]
            'visit_area': data['visit_area'].x                             # [V, feature_dim]
        }

        # 모델 forward
        x_dict = model(x_dict_raw, data.edge_index_dict)
        visit_area_embed = x_dict['visit_area']

        # 모든 visit_area 노드 쌍 조합 (너무 많으면 메모리 폭발!)
        n = visit_area_embed.size(0)
        all_edges = torch.combinations(torch.arange(n, device=device), r=2).t()

        # batch-wise로 score 계산 (메모리 폭발 방지)
        def predict_link_batch(node_embed, all_edges, batch_size=1000000):
            from tqdm import tqdm
            scores = []
            for i in tqdm(range(0, all_edges.size(1), batch_size)):
                batch_edges = all_edges[:, i:i+batch_size]
                batch_scores = model.predict_link(node_embed, batch_edges)
                scores.append(batch_scores)
            return torch.cat(scores, dim=0)

        edge_scores = predict_link_batch(visit_area_embed, all_edges, batch_size)

        # 경로 구성 (Greedy 방식)
        route = recommend_route(visit_area_embed, all_edges, edge_scores, max_steps=k)

    return route


In [93]:
# 유저 정보

def get_age_group(birthdate_str):
    """
    'YYYY-MM-DD' 형식의 생년월일 문자열을 받아
    20, 30, 40 등의 나이대로 변환하는 함수
    """
    from datetime import datetime
    
    birth_year = int(birthdate_str[:4])
    current_year = datetime.now().year
    age = current_year - birth_year + 1  # 한국식 나이
    age_group = (age // 10) * 10
    return age_group

def map_sido(sido:str):
    sido_code_map = {
        '서울특별시': '11',
        '부산광역시': '26',
        '대구광역시': '27',
        '인천광역시': '28',
        '광주광역시': '29',
        '대전광역시': '30',
        '울산광역시': '31',
        '세종특별자치시': '36',
        '경기도': '41',
        '강원도': '42',
        '충청북도': '43',
        '충청남도': '44',
        '전라북도': '45',
        '전라남도': '46',
        '경상북도': '47',
        '경상남도': '48',
        '제주특별자치도': '50'
    }

    return int(sido_code_map[sido])

def process_user_input(user_info:dict):
    user_feature_cols = [
    'GENDER', 'TRAVEL_TERM', 'TRAVEL_NUM',
    'TRAVEL_LIKE_SIDO_1', 'TRAVEL_LIKE_SIDO_2', 'TRAVEL_LIKE_SIDO_3',
    'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4',
    'TRAVEL_STYL_5', 'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
    'TRAVEL_MOTIVE_1', 'TRAVEL_MOTIVE_2',
    'AGE_GRP'
    ]
    
    # 1. 나잇대 계산
    user_info['AGE_GRP'] = get_age_group(user_info['BIRTHDATE'])
    
    # 2. 시도 변환
    for i in range(1, 4):
        user_info[f"TRAVEL_LIKE_SIDO_{i}"] = map_sido(user_info[f"TRAVEL_LIKE_SIDO_{i}"])
    
    # 3. 컬럼 필터링 (순서에 맞게)
    user_info = {k: int(user_info[k]) for k in user_feature_cols}
    
    return pd.DataFrame([user_info]).fillna(0).astype(np.float32).to_numpy()

In [94]:
# 여행 정보
def process_travel_input(travel_info:dict):
    from datetime import datetime
    travel_feature_cols = [
        'TOTAL_COST_BINNED_ENCODED',
        'WITH_PET',
        'MONTH',
        'DURATION',
        'MVMN_기타',
        'MVMN_대중교통',
        'MVMN_자가용',
        'TRAVEL_PURPOSE_1',
        'TRAVEL_PURPOSE_2',
        'TRAVEL_PURPOSE_3',
        'TRAVEL_PURPOSE_4',
        'TRAVEL_PURPOSE_5',
        'TRAVEL_PURPOSE_6',
        'TRAVEL_PURPOSE_7',
        'TRAVEL_PURPOSE_8',
        'TRAVEL_PURPOSE_9',
        'WHOWITH_2인여행',
        'WHOWITH_가족여행',
        'WHOWITH_기타',
        'WHOWITH_단독여행',
        'WHOWITH_친구/지인 여행']
    
    
    # mission_ENC에 0 = 반려동물 동반 (WITH_PET)
    travel_info['mission_ENC'] = travel_info['mission_ENC'].strip().split(',')
    if '0' in travel_info['mission_ENC']:
        travel_info['WITH_PET'] = 1
    else:
        travel_info['WITH_PET'] = 0
        
    # TRAVEL_PURPOSE_1 ~~ TRAVEL_PURPOSE_9 (0으로 들어온 입력은 제거해줘야됨) 
    for i in range(1,10):
        if str(i) in travel_info['mission_ENC']:
            travel_info[f'TRAVEL_PURPOSE_{i}'] = 1
        else:
            travel_info[f'TRAVEL_PURPOSE_{i}'] = 0
        
    # MONTH
    dates = travel_info['date_range'].split(' - ')
    travel_info['start_date'] = datetime.strptime(dates[0].strip(), "%Y-%m-%d")
    travel_info['end_date'] = datetime.strptime(dates[1].strip(), "%Y-%m-%d")
    
    travel_info['MONTH'] = travel_info['end_date'].month
    
    # DURATION
    travel_info['DURATION'] = (travel_info['end_date'] - travel_info['start_date']).days
    
    # MNVM_기타, MVMN_대중교통, MVMN_자가용
    for m in ['자가용', '대중교통', '기타']:
        travel_info[f"MVMN_{m}"] = False
    
    if travel_info['MVMN_NM_ENC'] == '1':
        travel_info['MVMN_자가용'] = True
    elif travel_info['MVMN_NM_ENC'] == '2':
        travel_info['MVMN_대중교통'] = True
    else:
        travel_info['MVMN_기타'] = True
    
    # WHOWITH는 1부터 5까지 숫자로 들어옴 -> 원핫 인코딩으로 수정할 것
    # dict에 들어오는 숫자 의미: WHOWITH_단독여행, WHOWITH_2인여행, WHOWITH_가족여행, WHOWITH_친구/지인여행, WHOWITH_기타
    whowith_onehot = [0] * 5
    idx = int(travel_info['whowith_ENC']) - 1
    if 0 <= idx < 5:
        whowith_onehot[idx] = 1
    
    travel_info.update({
    'WHOWITH_단독여행': whowith_onehot[0],
    'WHOWITH_2인여행': whowith_onehot[1],
    'WHOWITH_가족여행': whowith_onehot[2],
    'WHOWITH_친구/지인 여행': whowith_onehot[3],
    'WHOWITH_기타': whowith_onehot[4],
    })
    
    # TOTAL_COST_BINNED_ENCODED
    travel_info['TOTAL_COST_BINNED_ENCODED'] = travel_info['TOTAL_COST'][-1]
    
    # 컬럼 필터링 (순서에 맞게)
    travel_info = {k: int(travel_info[k]) for k in travel_feature_cols}
    
    return pd.DataFrame([travel_info]).fillna(0).astype(np.float32).to_numpy()

In [95]:
def select_best_location_by_distance(route_ids, visit_area_df):
    selected_names = []

    for idx, vid in enumerate(route_ids):
        candidates = visit_area_df[visit_area_df['VISIT_AREA_ID'] == vid]

        # 후보가 하나일 경우 바로 선택
        if len(candidates) == 1:
            selected_names.append(candidates.iloc[0]['VISIT_AREA_NM'])
            continue

        # 이전/다음 위치 좌표 확보
        prev_coord = None
        next_coord = None

        if idx > 0:
            prev_id = route_ids[idx - 1]
            prev_row = visit_area_df[visit_area_df['VISIT_AREA_ID'] == prev_id]
            if not prev_row.empty:
                prev_coord = (prev_row.iloc[0]['X_COORD'], prev_row.iloc[0]['Y_COORD'])

        if idx < len(route_ids) - 1:
            next_id = route_ids[idx + 1]
            next_row = visit_area_df[visit_area_df['VISIT_AREA_ID'] == next_id]
            if not next_row.empty:
                next_coord = (next_row.iloc[0]['X_COORD'], next_row.iloc[0]['Y_COORD'])

        # 거리 계산 함수
        def total_distance(row):
            x, y = row['X_COORD'], row['Y_COORD']
            dist = 0
            if prev_coord:
                dist += np.linalg.norm(np.array([x, y]) - np.array(prev_coord))
            if next_coord:
                dist += np.linalg.norm(np.array([x, y]) - np.array(next_coord))
            return dist

        # 최단 거리 후보 선택
        best_row = candidates.loc[candidates.apply(total_distance, axis=1).idxmin()]
        selected_names.append(best_row['VISIT_AREA_NM'])

    return selected_names

# 입력 및 테스트

In [96]:
temp_info = {'USER_ID': 'admin', 'PASSWORD': 'admin', 'CONFIRM_PASSWORD': 'admin', 'NAME': '유상범', 'BIRTHDATE': '1954-08-10', 'GENDER': '1', 'EDU_NM': '6', 'EDU_FNSH_SE': '2', 'MARR_STTS': '1', 'JOB_NM': '1', 'INCOME': '100', 'HOUSE_INCOME': '10000', 'TRAVEL_TERM': '1', 'TRAVEL_LIKE_SIDO_1': '경상북도', 'TRAVEL_LIKE_SIDO_2': '전라남도', 'TRAVEL_LIKE_SIDO_3': '충청남도', 'TRAVEL_STYL_1': 4, 'TRAVEL_STYL_2': 4, 'TRAVEL_STYL_3': 7, 'TRAVEL_STYL_4': 5, 'TRAVEL_STYL_5': 2, 'TRAVEL_STYL_6': 7, 'TRAVEL_STYL_7': 7, 'TRAVEL_STYL_8': 7, 'TRAVEL_MOTIVE_1': '7', 'TRAVEL_MOTIVE_2': '7', 'FAMILY_MEMB': '1', 'TRAVEL_NUM': '1', 'TRAVEL_COMPANIONS_NUM': '1'}


test_user_tensor = process_user_input(temp_info)

print(test_user_tensor.shape)
test_user_tensor

(1, 17)


array([[ 1.,  1.,  1., 47., 46., 44.,  4.,  4.,  7.,  5.,  2.,  7.,  7.,
         7.,  7.,  7., 70.]], dtype=float32)

In [97]:
test_travel = {'mission_ENC': '0,1', 'date_range': '2025-09-28 - 2025-10-31', 'start_date': '', 'end_date': '', 'TOTAL_COST': '1', 'MVMN_NM_ENC': '2', 'whowith_ENC': '1', 'mission_type': 'normal'}


test_travel_tensor = process_travel_input(test_travel)

print(test_travel_tensor.shape)
test_travel_tensor

(1, 21)


array([[ 1.,  1., 10., 33.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.]], dtype=float32)

In [98]:
user_input = torch.tensor(test_user_tensor, dtype=torch.float)  # 17차원
travel_input = torch.tensor(test_travel_tensor, dtype=torch.float)  # 21차원

user_input.shape, travel_input.shape

(torch.Size([1, 17]), torch.Size([1, 21]))

In [99]:
route_indices = infer_route(model, data, user_input, travel_input, k=20)

100%|██████████| 2/2 [00:02<00:00,  1.38s/it]


In [100]:
# visit_area_id로 역매핑
index_to_id = {v: k for k, v in visit_area_id_to_index.items()}
route_ids = [index_to_id[idx] for idx in route_indices]
print("추천 동선:", route_ids)

추천 동선: [2305190005, 2305260001, 2305270002, 2305270003, 2305270004, 2305280001, 2305290001, 2305290002, 2305290003, 2305290004, 2305290005, 2305290006, 2305300001, 2306150005, 2306150006, 2306150007, 2306250003, 2306250004, 2306250005, 2306250006]


In [101]:
names = select_best_location_by_distance(route_ids, visit_area_df)

for vid, name in zip(route_ids, names):
    print(vid, ":", name)

2305190005 : 감성 교복 잠실 본점
2305260001 : 사무실
2305270002 : 제주 국제공항
2305270003 : 김포국제공항 국내선
2305270004 : 버거킹 시청역점
2305280001 : 덕수궁 중명전
2305290001 : 토요코인 호텔 영등포점
2305290002 : 타임스퀘어
2305290003 : 오월의 종 타임스퀘어점
2305290004 : 호우섬
2305290005 : 롯데백화점 영등포점
2305290006 : 영등포역
2305300001 : 김포국제공항 국내선
2306150005 : 남이섬
2306150006 : 송원
2306150007 : 카페 75
2306250003 : 동탄 호수 공원
2306250004 : 강남역 사거리 강남역 지하상가
2306250005 : 삐삣버거 판교파미어스점
2306250006 : 나인 트리 프리미어 호텔 서울 판교
