# 데이터 로드

In [526]:
import torch
import numpy as np
import pandas as pd

In [527]:
# 주요 파일 경로
move_path = "../data/VL_csv/tn_move_his_이동내역_Cleaned_E.csv"
user_path = "../data/VL_csv/tn_traveller_master_여행객 Master_E_preprocessed.csv"
travel_path = "tn_travel_processed.csv"
visit_area_path = "../data/VL_csv/tn_visit_area_info_방문지정보_Cleaned_E.csv"

# 데이터 로딩
move_df = pd.read_csv(move_path)
user_df = pd.read_csv(user_path)
travel_df = pd.read_csv(travel_path, index_col=0)
visit_area_df = pd.read_csv(visit_area_path)

In [528]:
visit_area_df = visit_area_df.drop_duplicates(subset=['VISIT_AREA_ID'])

visit_area_id_to_index = {id_: idx for idx, id_ in enumerate(visit_area_df['VISIT_AREA_ID'])}

visit_area_index_to_id = {idx: id_ for id_, idx in visit_area_id_to_index.items()}

In [529]:
user_feature_cols = [
    'GENDER', 'TRAVEL_TERM', 'TRAVEL_NUM',
    'TRAVEL_LIKE_SIDO_1', 'TRAVEL_LIKE_SIDO_2', 'TRAVEL_LIKE_SIDO_3',
    'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4',
    'TRAVEL_STYL_5', 'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
    'TRAVEL_MOTIVE_1', 'TRAVEL_MOTIVE_2', 'AGE_GRP'
]
user_df = user_df[user_feature_cols].fillna(0)
user_tensor = torch.tensor(user_df.to_numpy(), dtype=torch.float)

In [530]:
travel_df

Unnamed: 0,TRAVEL_ID,TRAVELER_ID,TOTAL_COST_BINNED_ENCODED,WITH_PET,MONTH,DURATION,MVMN_기타,MVMN_대중교통,MVMN_자가용,TRAVEL_PURPOSE_1,...,TRAVEL_PURPOSE_5,TRAVEL_PURPOSE_6,TRAVEL_PURPOSE_7,TRAVEL_PURPOSE_8,TRAVEL_PURPOSE_9,WHOWITH_2인여행,WHOWITH_가족여행,WHOWITH_기타,WHOWITH_단독여행,WHOWITH_친구/지인 여행
0,e_e000004,e000004,1,0,5,1,True,False,False,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,e_e000006,e000006,1,0,5,2,True,False,False,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,e_e000009,e000009,4,0,5,2,True,False,False,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,e_e000010,e000010,5,0,5,2,True,False,False,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,e_e000011,e000011,0,0,5,3,True,False,False,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2555,g_g003294,g003294,2,0,7,2,False,False,True,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2556,g_g005600,g005600,1,0,8,1,True,False,False,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2557,g_g007343,g007343,1,0,8,1,True,False,False,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2558,g_g010556,g010556,2,0,9,1,False,False,True,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [531]:
travel_df.drop(['TRAVEL_ID', 'TRAVELER_ID'], axis=1).fillna(0).to_numpy()

array([[1, 0, 5, ..., 0.0, 0.0, 0.0],
       [1, 0, 5, ..., 1.0, 0.0, 0.0],
       [4, 0, 5, ..., 0.0, 0.0, 0.0],
       ...,
       [1, 0, 8, ..., 0.0, 1.0, 0.0],
       [2, 0, 9, ..., 1.0, 0.0, 0.0],
       [1, 0, 8, ..., 0.0, 0.0, 0.0]], dtype=object)

In [532]:
# bool 컬럼을 int로 변환 (True -> 1, False -> 0)
for col in travel_df.columns:
    if travel_df[col].dtype == 'bool':
        travel_df[col] = travel_df[col].astype(int)

# 그 외 object 타입도 전부 숫자형으로 강제 변환
travel_df = travel_df.apply(pd.to_numeric, errors='coerce').fillna(0)

# tensor 변환
travel_tensor = torch.tensor(travel_df.drop(['TRAVEL_ID', 'TRAVELER_ID'], axis=1).to_numpy(), dtype=torch.float)


In [533]:
visit_area_df['X_COORD'] = visit_area_df['X_COORD'].fillna(visit_area_df['X_COORD'].mean())
visit_area_df['Y_COORD'] = visit_area_df['Y_COORD'].fillna(visit_area_df['Y_COORD'].mean())
visit_area_df['VISIT_CHC_REASON_CD'] = visit_area_df['VISIT_CHC_REASON_CD'].fillna(0)

In [534]:
for col in ['DGSTFN', 'REVISIT_INTENTION', 'RCMDTN_INTENTION']:
    visit_area_df[col] = visit_area_df[col].fillna(3)

In [535]:
features = visit_area_df[['X_COORD', 'Y_COORD']].copy()

type_onehot = pd.get_dummies(visit_area_df['VISIT_AREA_TYPE_CD'], prefix='type')
reason_onehot = pd.get_dummies(visit_area_df['VISIT_CHC_REASON_CD'], prefix='reason')

visit_area_df['DGSTFN_norm'] = (visit_area_df['DGSTFN'] - 1) / 4.0
visit_area_df['REVISIT_norm'] = (visit_area_df['REVISIT_INTENTION'] - 1) / 4.0
visit_area_df['RCMDTN_norm'] = (visit_area_df['RCMDTN_INTENTION'] - 1) / 4.0

features = pd.concat([features, type_onehot, reason_onehot, 
                      visit_area_df[['DGSTFN_norm', 'REVISIT_norm', 'RCMDTN_norm']]], axis=1)

for col in features.columns:
    if features[col].dtype == 'bool':
        features[col] = features[col].astype(int)

visit_area_tensor = torch.tensor(features.to_numpy(), dtype=torch.float)



In [536]:
# user-travel edge
travel_id_to_index = {id_: idx for idx, id_ in enumerate(travel_df['TRAVEL_ID'])}
user_travel_edges = travel_df[['TRAVELER_ID', 'TRAVEL_ID']].dropna()
user_travel_edges['user_idx'] = user_travel_edges['TRAVELER_ID'].astype(int)
user_travel_edges['travel_idx'] = user_travel_edges['TRAVEL_ID'].map(travel_id_to_index)
user_travel_edge_index = torch.tensor(user_travel_edges[['user_idx', 'travel_idx']].to_numpy().T, dtype=torch.long)

# travel-visit_area edge
travel_visit_edges = visit_area_df[['TRAVEL_ID', 'VISIT_AREA_ID']].dropna()
travel_visit_edges['travel_idx'] = travel_visit_edges['TRAVEL_ID'].map(travel_id_to_index)
travel_visit_edges['visit_area_idx'] = travel_visit_edges['VISIT_AREA_ID'].map(visit_area_id_to_index)
travel_visit_edge_index = torch.tensor(travel_visit_edges[['travel_idx', 'visit_area_idx']].to_numpy().T, dtype=torch.long)


In [537]:
def create_visit_area_edges(move_df, visit_area_id_to_index):
    move_df["START_DT_MIN"] = pd.to_datetime(move_df["START_DT_MIN"], errors="coerce")
    move_df["END_DT_MIN"] = pd.to_datetime(move_df["END_DT_MIN"], errors="coerce")

    cols = ["TRAVEL_ID", "TRIP_ID", "START_VISIT_AREA_ID", "END_VISIT_AREA_ID",
            "START_DT_MIN", "END_DT_MIN", "MVMN_CD_1"]
    move_df = move_df[cols].copy()

    edges = []

    for travel_id, group in move_df.sort_values(["TRAVEL_ID", "TRIP_ID"]).groupby("TRAVEL_ID"):
        group = group.reset_index(drop=True)
        n = len(group)
        if n < 2:
            continue

        # 첫 이동: START → END
        start_time = group.loc[0, "START_DT_MIN"]
        end_time = group.loc[1, "END_DT_MIN"]
        if pd.notna(start_time) and pd.notna(end_time):
            duration = (end_time - start_time).total_seconds() / 60
            from_id = group.loc[0, "START_VISIT_AREA_ID"]
            to_id = group.loc[1, "END_VISIT_AREA_ID"]
            transport = group.loc[1, "MVMN_CD_1"]
            if pd.notna(from_id) and pd.notna(to_id) and pd.notna(transport):
                edges.append([int(from_id), int(to_id), duration, int(transport)])

        # 이후 이동: END[i] → END[i+1]
        for i in range(1, n - 1):
            t1 = group.loc[i, "END_DT_MIN"]
            t2 = group.loc[i + 1, "END_DT_MIN"]
            if pd.notna(t1) and pd.notna(t2):
                duration = (t2 - t1).total_seconds() / 60
                from_id = group.loc[i, "END_VISIT_AREA_ID"]
                to_id = group.loc[i + 1, "END_VISIT_AREA_ID"]
                transport = group.loc[i + 1, "MVMN_CD_1"]
                if pd.notna(from_id) and pd.notna(to_id) and pd.notna(transport):
                    edges.append([int(from_id), int(to_id), duration, int(transport)])

    # DataFrame으로 정리
    edges_df = pd.DataFrame(edges, columns=[
        "FROM_VISIT_AREA_ID", "TO_VISIT_AREA_ID", "DURATION_MINUTES", "MVMN_CD_1"
    ])

    # 이동수단 통합
    def classify_transport(code):
        if code in [1, 2, 3]:
            return "drive"
        elif code in [4,5,6,7,8,9,10,11,12,13,50]:
            return "public"
        else:
            return "other"

    edges_df["MVMN_TYPE"] = edges_df["MVMN_CD_1"].apply(classify_transport)
    edges_df["is_drive"] = (edges_df["MVMN_TYPE"] == "drive").astype(int)
    edges_df["is_public"] = (edges_df["MVMN_TYPE"] == "public").astype(int)
    edges_df["is_other"] = (edges_df["MVMN_TYPE"] == "other").astype(int)

    # visit_area_id 매핑
    edges_df_filtered = edges_df[
        edges_df["FROM_VISIT_AREA_ID"].isin(visit_area_id_to_index) &
        edges_df["TO_VISIT_AREA_ID"].isin(visit_area_id_to_index)
    ].copy()

    visit_area_edge_index = torch.tensor([
        [visit_area_id_to_index[f] for f in edges_df_filtered["FROM_VISIT_AREA_ID"]],
        [visit_area_id_to_index[t] for t in edges_df_filtered["TO_VISIT_AREA_ID"]]
    ], dtype=torch.long)

    visit_area_edge_attr = torch.tensor(
        edges_df_filtered[["DURATION_MINUTES", "is_drive", "is_public", "is_other"]].to_numpy(),
        dtype=torch.float
    )

    return visit_area_edge_index, visit_area_edge_attr

In [538]:
visit_area_edge_index, visit_area_edge_attr = create_visit_area_edges(move_df, visit_area_id_to_index)

In [539]:
data = HeteroData()

data['user'].x = user_tensor
data['travel'].x = travel_tensor
data['visit_area'].x = visit_area_tensor

# user-travel edge
travel_id_to_index = {id_: idx for idx, id_ in enumerate(travel_df['TRAVEL_ID'])}
user_travel_edges = travel_df[['TRAVELER_ID', 'TRAVEL_ID']].dropna()
user_travel_edges['user_idx'] = user_travel_edges['TRAVELER_ID'].astype(int)
user_travel_edges['travel_idx'] = user_travel_edges['TRAVEL_ID'].map(travel_id_to_index)
user_travel_edge_index = torch.tensor(user_travel_edges[['user_idx', 'travel_idx']].to_numpy().T, dtype=torch.long)

data['user', 'traveled', 'travel'].edge_index = user_travel_edge_index
data['travel', 'traveled_by', 'user'].edge_index = user_travel_edge_index.flip(0)

# travel-visit_area edge
travel_visit_edges = visit_area_df[['TRAVEL_ID', 'VISIT_AREA_ID']].dropna()
travel_visit_edges['travel_idx'] = travel_visit_edges['TRAVEL_ID'].map(travel_id_to_index)
travel_visit_edges['visit_area_idx'] = travel_visit_edges['VISIT_AREA_ID'].map(visit_area_id_to_index)
travel_visit_edge_index = torch.tensor(travel_visit_edges[['travel_idx', 'visit_area_idx']].to_numpy().T, dtype=torch.long)

data['travel', 'contains', 'visit_area'].edge_index = travel_visit_edge_index
data['visit_area', 'contained_in', 'travel'].edge_index = travel_visit_edge_index.flip(0)

# visit_area 이동 엣지
data['visit_area', 'moved_to', 'visit_area'].edge_index = visit_area_edge_index
data['visit_area', 'moved_to', 'visit_area'].edge_attr = visit_area_edge_attr


In [540]:
data

HeteroData(
  user={ x=[1919, 17] },
  travel={ x=[2560, 21] },
  visit_area={ x=[1432, 34] },
  (user, traveled, travel)={ edge_index=[2, 2560] },
  (travel, traveled_by, user)={ edge_index=[2, 2560] },
  (travel, contains, visit_area)={ edge_index=[2, 1432] },
  (visit_area, contained_in, travel)={ edge_index=[2, 1432] },
  (visit_area, moved_to, visit_area)={
    edge_index=[2, 18461],
    edge_attr=[18461, 4],
  }
)

# GNN 모델 재설계

In [541]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import HeteroConv, SAGEConv, Linear

class TravelLinkPredictor(nn.Module):
    def __init__(self, metadata, hidden_dim=64):
        super(TravelLinkPredictor, self).__init__()

        self.metadata = metadata

        # HeteroConv for heterogeneous graph
        self.conv1 = HeteroConv({
            edge_type: SAGEConv((-1, -1), hidden_dim)
            for edge_type in metadata[1]
        }, aggr='sum')

        self.conv2 = HeteroConv({
            edge_type: SAGEConv((hidden_dim, hidden_dim), hidden_dim)
            for edge_type in metadata[1]
        }, aggr='sum')

        # Output projection for node embeddings
        self.lin = nn.Linear(hidden_dim, hidden_dim)

        # Link prediction decoder (e.g., dot product + sigmoid)
        self.edge_predictor = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x_dict, edge_index_dict):
        # 1️⃣ Node embeddings
        x_dict = self.conv1(x_dict, edge_index_dict)
        x_dict = {k: F.relu(v) for k, v in x_dict.items()}
        x_dict = self.conv2(x_dict, edge_index_dict)
        x_dict = {k: self.lin(v) for k, v in x_dict.items()}

        return x_dict

    def decode(self, travel_emb, visit_area_emb, edge_index):
        # travel_emb, visit_area_emb: (N, hidden_dim)
        src, dst = edge_index
        edge_feature = torch.cat([
            travel_emb[src],
            visit_area_emb[dst]
        ], dim=1)
        return torch.sigmoid(self.edge_predictor(edge_feature)).squeeze(-1)


# 학습 파이프라인

In [542]:
from torch_geometric.loader import LinkNeighborLoader

# ➡️ DataLoader for edge-level task
loader = LinkNeighborLoader(
    data,
    num_neighbors=[10, 10],
    edge_label_index=(('travel', 'contains', 'visit_area'), data['travel', 'contains', 'visit_area'].edge_index),
    edge_label=torch.ones(data['travel', 'contains', 'visit_area'].edge_index.size(1)),
    neg_sampling='binary',   # negative sampling 방식
    neg_sampling_ratio=1.0,  # negative sample 비율
    batch_size=1024,
    shuffle=True
)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = TravelLinkPredictor(data.metadata(), hidden_dim=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

# ➡️ 학습 루프
for epoch in range(1, 21):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)

        # Forward
        x_dict = model(batch.x_dict, batch.edge_index_dict)

        # Positive/negative edges
        travel_emb = x_dict['travel']
        visit_area_emb = x_dict['visit_area']
        edge_index = batch['travel', 'contains', 'visit_area'].edge_label_index
        edge_label = batch['travel', 'contains', 'visit_area'].edge_label.float()

        pred = model.decode(travel_emb, visit_area_emb, edge_index)
        loss = F.binary_cross_entropy(pred, edge_label)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch} | Loss: {total_loss:.4f}")


Epoch 1 | Loss: 2.2196
Epoch 2 | Loss: 0.8844
Epoch 3 | Loss: 0.0602
Epoch 4 | Loss: 0.1499
Epoch 5 | Loss: 0.1119
Epoch 6 | Loss: 0.0003
Epoch 7 | Loss: 0.0000
Epoch 8 | Loss: 0.0000
Epoch 9 | Loss: 0.0000
Epoch 10 | Loss: 0.1225
Epoch 11 | Loss: 0.0000
Epoch 12 | Loss: 0.0000
Epoch 13 | Loss: 0.0000
Epoch 14 | Loss: 0.0000
Epoch 15 | Loss: 0.0488
Epoch 16 | Loss: 0.0000
Epoch 17 | Loss: 0.0000
Epoch 18 | Loss: 0.0000
Epoch 19 | Loss: 0.0000
Epoch 20 | Loss: 0.1225


In [543]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.eval()

TravelLinkPredictor(
  (conv1): HeteroConv(num_relations=5)
  (conv2): HeteroConv(num_relations=5)
  (lin): Linear(in_features=64, out_features=64, bias=True)
  (edge_predictor): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [544]:
def process_travel_input(travel_info:dict):
    from datetime import datetime
    travel_feature_cols = [
        'TOTAL_COST_BINNED_ENCODED',
        'WITH_PET',
        'MONTH',
        'DURATION',
        'MVMN_기타',
        'MVMN_대중교통',
        'MVMN_자가용',
        'TRAVEL_PURPOSE_1',
        'TRAVEL_PURPOSE_2',
        'TRAVEL_PURPOSE_3',
        'TRAVEL_PURPOSE_4',
        'TRAVEL_PURPOSE_5',
        'TRAVEL_PURPOSE_6',
        'TRAVEL_PURPOSE_7',
        'TRAVEL_PURPOSE_8',
        'TRAVEL_PURPOSE_9',
        'WHOWITH_2인여행',
        'WHOWITH_가족여행',
        'WHOWITH_기타',
        'WHOWITH_단독여행',
        'WHOWITH_친구/지인 여행']
    
    
    # mission_ENC에 0 = 반려동물 동반 (WITH_PET)
    travel_info['mission_ENC'] = travel_info['mission_ENC'].strip().split(',')
    if '0' in travel_info['mission_ENC']:
        travel_info['WITH_PET'] = 1
    else:
        travel_info['WITH_PET'] = 0
        
    # TRAVEL_PURPOSE_1 ~~ TRAVEL_PURPOSE_9 (0으로 들어온 입력은 제거해줘야됨) 
    for i in range(1,10):
        if str(i) in travel_info['mission_ENC']:
            travel_info[f'TRAVEL_PURPOSE_{i}'] = 1
        else:
            travel_info[f'TRAVEL_PURPOSE_{i}'] = 0
        
    # MONTH
    dates = travel_info['date_range'].split(' - ')
    travel_info['start_date'] = datetime.strptime(dates[0].strip(), "%Y-%m-%d")
    travel_info['end_date'] = datetime.strptime(dates[1].strip(), "%Y-%m-%d")
    
    travel_info['MONTH'] = travel_info['end_date'].month
    
    # DURATION
    travel_info['DURATION'] = (travel_info['end_date'] - travel_info['start_date']).days
    
    # MNVM_기타, MVMN_대중교통, MVMN_자가용
    for m in ['자가용', '대중교통', '기타']:
        travel_info[f"MVMN_{m}"] = False
    
    if travel_info['MVMN_NM_ENC'] == '1':
        travel_info['MVMN_자가용'] = True
    elif travel_info['MVMN_NM_ENC'] == '2':
        travel_info['MVMN_대중교통'] = True
    else:
        travel_info['MVMN_기타'] = True
    
    # WHOWITH는 1부터 5까지 숫자로 들어옴 -> 원핫 인코딩으로 수정할 것
    # dict에 들어오는 숫자 의미: WHOWITH_단독여행, WHOWITH_2인여행, WHOWITH_가족여행, WHOWITH_친구/지인여행, WHOWITH_기타
    whowith_onehot = [0] * 5
    idx = int(travel_info['whowith_ENC']) - 1
    if 0 <= idx < 5:
        whowith_onehot[idx] = 1
    
    travel_info.update({
    'WHOWITH_단독여행': whowith_onehot[0],
    'WHOWITH_2인여행': whowith_onehot[1],
    'WHOWITH_가족여행': whowith_onehot[2],
    'WHOWITH_친구/지인 여행': whowith_onehot[3],
    'WHOWITH_기타': whowith_onehot[4],
    })
    
    # TOTAL_COST_BINNED_ENCODED
    travel_info['TOTAL_COST_BINNED_ENCODED'] = travel_info['TOTAL_COST'][-1]
    
    # 컬럼 필터링 (순서에 맞게)
    travel_info = {k: int(travel_info[k]) for k in travel_feature_cols}
    
    return pd.DataFrame([travel_info]).fillna(0).astype(np.float32).to_numpy()

In [545]:
def get_age_group(birthdate_str):
    """
    'YYYY-MM-DD' 형식의 생년월일 문자열을 받아
    20, 30, 40 등의 나이대로 변환하는 함수
    """
    from datetime import datetime
    
    birth_year = int(birthdate_str[:4])
    current_year = datetime.now().year
    age = current_year - birth_year + 1  # 한국식 나이
    age_group = (age // 10) * 10
    return age_group

def map_sido(sido:str):
    # sido_code_map = {
    #     '서울특별시': '11',
    #     '부산광역시': '26',
    #     '대구광역시': '27',
    #     '인천광역시': '28',
    #     '광주광역시': '29',
    #     '대전광역시': '30',
    #     '울산광역시': '31',
    #     '세종특별자치시': '36',
    #     '경기도': '41',
    #     '강원도': '42',
    #     '충청북도': '43',
    #     '충청남도': '44',
    #     '전라북도': '45',
    #     '전라남도': '46',
    #     '경상북도': '47',
    #     '경상남도': '48',
    #     '제주특별자치도': '50'
    # }
    sido_code_map = {
        '서울특별시': '1',
        '부산광역시': '2',
        '대구광역시': '3',
        '인천광역시': '4',
        '광주광역시': '5',
        '대전광역시': '6',
        '울산광역시': '7',
        '세종특별자치시': '8',
        '경기도': '9',
        '강원도': '10',
        '충청북도': '11',
        '충청남도': '12',
        '전라북도': '13',
        '전라남도': '14',
        '경상북도': '15',
        '경상남도': '16',
        '제주특별자치도': '17'
    }

    return int(sido_code_map[sido])

In [546]:
def process_user_input(user_info:dict):
    user_feature_cols = [
    'GENDER', 'TRAVEL_TERM', 'TRAVEL_NUM',
    'TRAVEL_LIKE_SIDO_1', 'TRAVEL_LIKE_SIDO_2', 'TRAVEL_LIKE_SIDO_3',
    'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4',
    'TRAVEL_STYL_5', 'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
    'TRAVEL_MOTIVE_1', 'TRAVEL_MOTIVE_2',
    'AGE_GRP'
    ]
    
    # 1. 나잇대 계산
    user_info['AGE_GRP'] = get_age_group(user_info['BIRTHDATE']) // 10
    
    # 2. 시도 변환
    for i in range(1, 4):
        user_info[f"TRAVEL_LIKE_SIDO_{i}"] = map_sido(user_info[f"TRAVEL_LIKE_SIDO_{i}"]) / 10
    
    # 3. 컬럼 필터링 (순서에 맞게)
    user_info = {k: int(user_info[k]) for k in user_feature_cols}
    
    return pd.DataFrame([user_info]).fillna(0).astype(np.float32).to_numpy()

In [555]:
temp_info = {'USER_ID': 'admin', 'PASSWORD': 'admin', 'CONFIRM_PASSWORD': 'admin', 'NAME': '유상범', 'BIRTHDATE': '1999-08-10', 'GENDER': '2', 'EDU_NM': '6', 'EDU_FNSH_SE': '2', 'MARR_STTS': '1', 'JOB_NM': '1', 'INCOME': '100', 'HOUSE_INCOME': '10000', 'TRAVEL_TERM': '1', 'TRAVEL_LIKE_SIDO_1': '부산광역시', 'TRAVEL_LIKE_SIDO_2': '전라남도', 'TRAVEL_LIKE_SIDO_3': '충청남도', 'TRAVEL_STYL_1': 4, 'TRAVEL_STYL_2': 4, 'TRAVEL_STYL_3': 1, 'TRAVEL_STYL_4': 5, 'TRAVEL_STYL_5': 2, 'TRAVEL_STYL_6': 4, 'TRAVEL_STYL_7': 3, 'TRAVEL_STYL_8': 2, 'TRAVEL_MOTIVE_1': '7', 'TRAVEL_MOTIVE_2': '7', 'FAMILY_MEMB': '1', 'TRAVEL_NUM': '1', 'TRAVEL_COMPANIONS_NUM': '1'}
test_travel = {'mission_ENC': '0,5,7,9', 'date_range': '2025-05-28 - 2025-05-30', 'start_date': '', 'end_date': '', 'TOTAL_COST': '10', 'MVMN_NM_ENC': '2', 'whowith_ENC': '3', 'mission_type': 'normal'}
# test_travel = {'mission_ENC': '1,2,3,7,9', 'date_range': '2025-09-28 - 2025-09-29', 'start_date': '', 'end_date': '', 'TOTAL_COST': '4', 'MVMN_NM_ENC': '1', 'whowith_ENC': '1', 'mission_type': 'normal'}

test_travel_tensor = process_travel_input(test_travel)
test_user_tensor = process_user_input(temp_info)


In [561]:
import torch

def recommend_visit_areas(
    model, 
    data, 
    new_travel_feature, 
    top_k=10, 
    device='cuda' if torch.cuda.is_available() else 'cpu'
):
    """
    새로운 travel feature를 입력받아 visit_area 추천 리스트와 확률을 반환하는 함수

    Args:
        model: 학습된 GNN 모델
        data: HeteroData (기존 그래프)
        new_travel_feature: [1, feature_dim] 형태의 travel feature
        top_k: 추천 방문지 개수
        device: 사용 디바이스 (default: cuda)

    Returns:
        topk_indices: 추천 방문지 인덱스 (numpy array)
        topk_scores: 추천 확률 (numpy array)
    """
    model.eval()

    with torch.no_grad():
        # 1️⃣ travel feature에 새로운 travel 추가
        new_travel_feature = new_travel_feature.to(device)
        data['travel'].x = torch.cat([data['travel'].x.to(device), new_travel_feature], dim=0)
        new_travel_idx = data['travel'].x.size(0) - 1

        # 2️⃣ 전체 노드 임베딩 계산
        x_dict = model(data.x_dict, data.edge_index_dict)
        travel_emb = x_dict['travel']
        visit_area_emb = x_dict['visit_area']

        # 3️⃣ 새로운 travel과 모든 visit_area의 edge_index 생성
        num_visit_areas = visit_area_emb.size(0)
        edge_index = torch.stack([
            torch.full((num_visit_areas,), new_travel_idx, dtype=torch.long),
            torch.arange(num_visit_areas, dtype=torch.long)
        ], dim=0).to(device)

        # 4️⃣ edge 확률 예측
        edge_probs = model.decode(travel_emb, visit_area_emb, edge_index)

        # 5️⃣ top-k 방문지 index 및 확률 추출
        topk_indices = edge_probs.topk(top_k).indices.cpu().numpy()
        topk_scores = edge_probs[topk_indices].cpu().numpy()

        return topk_indices, topk_scores


In [566]:
test_travel_tensor = torch.tensor(test_travel_tensor, dtype=torch.float)

topk_indices, topk_scores = recommend_visit_areas(model, data, test_travel_tensor, top_k=10)

print("추천 방문지 index:", topk_indices)
print("추천 확률:", topk_scores)


추천 방문지 index: [ 635 1423  982  983  790  876  224 1300 1270  142]
추천 확률: [4.3654304e-15 4.3404230e-15 4.2827104e-15 4.2574459e-15 4.2455256e-15
 4.0965643e-15 4.0914415e-15 4.0853437e-15 4.0783836e-15 4.0556704e-15]


  test_travel_tensor = torch.tensor(test_travel_tensor, dtype=torch.float)


2309070002
2308280002
2308300002
2307130005
2308160002
2309100002
2305150002
2307300014
2307130002
2309170001


In [567]:
total = []
for i in topk_indices:
    total.append(visit_area_index_to_id[i])


In [568]:
import numpy as np

def select_best_location_by_distance(route_ids, visit_area_df):
    selected_names = []

    for idx, vid in enumerate(route_ids):
        candidates = visit_area_df[visit_area_df['VISIT_AREA_ID'] == vid]

        # 후보가 하나일 경우 바로 선택
        if len(candidates) == 1:
            selected_names.append(candidates.iloc[0]['VISIT_AREA_NM'])
            continue

        # 이전/다음 위치 좌표 확보
        prev_coord = None
        next_coord = None

        if idx > 0:
            prev_id = route_ids[idx - 1]
            prev_row = visit_area_df[visit_area_df['VISIT_AREA_ID'] == prev_id]
            if not prev_row.empty:
                prev_coord = (prev_row.iloc[0]['X_COORD'], prev_row.iloc[0]['Y_COORD'])

        if idx < len(route_ids) - 1:
            next_id = route_ids[idx + 1]
            next_row = visit_area_df[visit_area_df['VISIT_AREA_ID'] == next_id]
            if not next_row.empty:
                next_coord = (next_row.iloc[0]['X_COORD'], next_row.iloc[0]['Y_COORD'])

        # 거리 계산 함수
        def total_distance(row):
            x, y = row['X_COORD'], row['Y_COORD']
            dist = 0
            if prev_coord:
                dist += np.linalg.norm(np.array([x, y]) - np.array(prev_coord))
            if next_coord:
                dist += np.linalg.norm(np.array([x, y]) - np.array(next_coord))
            return dist

        # 최단 거리 후보 선택
        best_row = candidates.loc[candidates.apply(total_distance, axis=1).idxmin()]
        selected_names.append(best_row['VISIT_AREA_NM'])

    return selected_names

In [569]:
select_best_location_by_distance(total, visit_area_df)

['영등포역 1호선',
 '수원 화성 박물관',
 '공릉역 7호선',
 '유림 고속버스정류장',
 '상동호수 공원',
 '대소 버스공동 정류장',
 '서울역',
 '신호등 장작구이 서오릉 본점',
 '엔드밀 성수점',
 '평택 지제역']