# 전처리

In [156]:
import pandas as pd
import torch
import numpy as np
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, MinMaxScaler

# 데이터 경로
data_path = "../data/VL_csv/"

# CSV 파일 로드
user_df = pd.read_csv(data_path + "tn_traveller_master_여행객 Master_E_preprocessed.csv")
travel_df = pd.read_csv(data_path + "tn_travel_여행_E_COST_cleaned_gnn.csv")
visit_df = pd.read_csv(data_path + "tn_visit_area_info_방문지정보_Cleaned_E.csv")
move_df = pd.read_csv(data_path + "tn_move_his_이동내역_Cleaned_E.csv")

# 고정된 feature 컬럼
user_feature_cols = [
    'GENDER', 'EDU_NM', 'EDU_FNSH_SE', 'MARR_STTS', 'JOB_NM', 'HOUSE_INCOME',
    'TRAVEL_TERM', 'TRAVEL_LIKE_SIDO_1', 'TRAVEL_LIKE_SIDO_2', 'TRAVEL_LIKE_SIDO_3',
    'AGE_GRP', 'FAMILY_MEMB', 'TRAVEL_NUM', 'TRAVEL_COMPANIONS_NUM',
    'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4',
    'TRAVEL_STYL_5', 'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
    'TRAVEL_MOTIVE_1', 'TRAVEL_MOTIVE_2', 'INCOME'
]

travel_feature_cols = [
    'LODGOUT_COST', 'ACTIVITY_COST',
    'TOTAL_COST', 'DURATION', 'PURPOSE_1', 'PURPOSE_10', 'PURPOSE_11',
    'PURPOSE_12', 'PURPOSE_13', 'PURPOSE_2', 'PURPOSE_21', 'PURPOSE_22',
    'PURPOSE_23', 'PURPOSE_24', 'PURPOSE_25', 'PURPOSE_26', 'PURPOSE_27',
    'PURPOSE_28', 'PURPOSE_3', 'PURPOSE_4', 'PURPOSE_5', 'PURPOSE_6',
    'PURPOSE_7', 'PURPOSE_8', 'PURPOSE_9', 'MVMN_NM_ENC', 'age_ENC',
    'whowith_ENC', 'mission_ENC'
]

# ID 매핑
user_ids = sorted(user_df["TRAVELER_ID"].unique())
travel_ids = sorted(travel_df["TRAVEL_ID"].unique())

user_id_map = {uid: i for i, uid in enumerate(user_ids)}
travel_id_map = {tid: i for i, tid in enumerate(travel_ids)}

# feature 추출
user_tensor = torch.tensor(
    user_df[user_feature_cols].fillna(0).astype(float).values,
    dtype=torch.float
)
travel_tensor = torch.tensor(
    travel_df[travel_feature_cols].fillna(0).astype(float).values,
    dtype=torch.float
)

# 결과 요약
summary = {
    "user_tensor_shape": user_tensor.shape,
    "travel_tensor_shape": travel_tensor.shape,
    "user_id_map_size": len(user_id_map),
    "travel_id_map_size": len(travel_id_map),
    "user_example": user_tensor[0],
    "travel_example": travel_tensor[0]
}

summary


{'user_tensor_shape': torch.Size([1919, 25]),
 'travel_tensor_shape': torch.Size([2560, 29]),
 'user_id_map_size': 1919,
 'travel_id_map_size': 2560,
 'user_example': tensor([ 2.,  4.,  1.,  3., 11.,  9.,  2., 11., 47., 41., 60.,  3.,  2.,  1.,
          2.,  4.,  4.,  4.,  4.,  4.,  5.,  5.,  2.,  6.,  4.]),
 'travel_example': tensor([1.4394e-02, 0.0000e+00, 1.2607e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 1.0000e+00, 5.0000e+00, 0.0000e+00])}

In [None]:
import torch
from torch_geometric.data import HeteroData

def build_travel_subgraph(travel_id: str,
                           travel_df,
                           visit_df,
                           move_df,
                           user_id_map,
                           travel_id_map,
                           visit_id_map,
                           user_features,
                           travel_features,
                           visit_area_dim,
                           travel_label_vectors):
    if travel_id not in travel_id_map:
        return None
    t_idx = travel_id_map[travel_id]

    row = travel_df[travel_df["TRAVEL_ID"] == travel_id].iloc[0]
    traveler_id = row["TRAVELER_ID"]
    if traveler_id not in user_id_map:
        return None
    u_idx = user_id_map[traveler_id]

    # visit_area 노드 추출
    visits = visit_df[visit_df["TRAVEL_ID"] == travel_id]["VISIT_AREA_ID"].tolist()
    visits = [vid for vid in visits if vid in visit_id_map]
    v_indices = [visit_id_map[vid] for vid in visits]

    # 이동 edge (이 travel_id만)
    move_group = move_df[move_df["TRAVEL_ID"] == travel_id]
    move_edges = [[], []]
    path = []
    for _, r in move_group.iterrows():
        sid = r["START_VISIT_AREA_ID"]
        eid = r["END_VISIT_AREA_ID"]
        if pd.notna(sid):
            path = [int(float(sid))]
        if pd.notna(eid):
            path.append(int(float(eid)))
    for a, b in zip(path[:-1], path[1:]):
        if a in visit_id_map and b in visit_id_map:
            if visit_id_map[a] in v_indices and visit_id_map[b] in v_indices:
                move_edges[0].append(v_indices.index(visit_id_map[a]))
                move_edges[1].append(v_indices.index(visit_id_map[b]))
    move_edge_index = torch.tensor(move_edges, dtype=torch.long) if move_edges[0] else torch.empty((2, 0), dtype=torch.long)

    # GNN 입력용 HeteroData 구성
    data = HeteroData()
    data['user'].x = user_features[u_idx].unsqueeze(0)  # [1, user_dim]
    data['travel'].x = travel_features[t_idx].unsqueeze(0)  # [1, travel_dim]
    data['visit_area'].x = torch.zeros((len(v_indices), visit_area_dim))  # dummy

    # edge 설정
    # 정방향: user → travel
    data[('user', 'traveled', 'travel')].edge_index = torch.tensor([[0], [0]], dtype=torch.long)

    # 역방향: travel → user
    data[('travel', 'traveled_by', 'user')].edge_index = torch.tensor([[0], [0]], dtype=torch.long)

    # travel → visit_area
    data[('travel', 'contains', 'visit_area')].edge_index = torch.stack([
        torch.zeros(len(v_indices), dtype=torch.long),
        torch.arange(len(v_indices))
    ])

    # visit_area 이동 edge
    data[('visit_area', 'move_1', 'visit_area')].edge_index = move_edge_index

    # label
    full_label = travel_label_vectors[travel_id]
    label_mask = torch.tensor(v_indices, dtype=torch.long)
    visit_label = full_label[label_mask]
    data['visit_area'].y = visit_label

    return data


In [158]:
from torch_geometric.loader import DataLoader

def build_travel_dataloader(travel_ids,
                             travel_df,
                             visit_df,
                             move_df,
                             user_id_map,
                             travel_id_map,
                             visit_id_map,
                             user_tensor,
                             travel_tensor,
                             visit_area_dim,
                             travel_label_vectors,
                             batch_size=16,
                             shuffle=True):

    dataset = []
    skipped = []

    for tid in travel_ids:
        try:
            data = build_travel_subgraph(
                travel_id=tid,
                travel_df=travel_df,
                visit_df=visit_df,
                move_df=move_df,
                user_id_map=user_id_map,
                travel_id_map=travel_id_map,
                visit_id_map=visit_id_map,
                user_features=user_tensor,
                travel_features=travel_tensor,
                visit_area_dim=visit_area_dim,
                travel_label_vectors=travel_label_vectors
            )
            if data is not None:
                dataset.append(data)
            else:
                skipped.append(tid)
        except:
            skipped.append(tid)

    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return loader, skipped


In [159]:
# 고유 ID 추출
visit_area_ids = sorted(visit_df["VISIT_AREA_ID"].unique())
travel_ids = sorted(visit_df["TRAVEL_ID"].unique())

# 매핑 테이블
visit_area_id_to_index = {vid: i for i, vid in enumerate(visit_area_ids)}
travel_id_to_index = {tid: i for i, tid in enumerate(travel_ids)}

# 라벨 벡터 생성
travel_to_visits = visit_df.groupby("TRAVEL_ID")["VISIT_AREA_ID"].apply(list).to_dict()
travel_label_vectors = {}

for travel_id, visit_list in travel_to_visits.items():
    label = torch.zeros(len(visit_area_id_to_index))
    for vid in visit_list:
        if vid in visit_area_id_to_index:
            label[visit_area_id_to_index[vid]] = 1.0
    travel_label_vectors[travel_id] = label

In [160]:
travel_ids = list(travel_label_vectors.keys())

valid_user_ids = set(user_df["TRAVELER_ID"])
valid_travel_ids = travel_df[travel_df["TRAVELER_ID"].isin(valid_user_ids)]["TRAVEL_ID"].tolist()

loader, skipped_ids = build_travel_dataloader(
    travel_ids=valid_travel_ids,
    travel_df=travel_df,
    visit_df=visit_df,
    move_df=move_df,
    user_id_map=user_id_map,
    travel_id_map=travel_id_map,
    visit_id_map=visit_area_id_to_index,
    user_tensor=user_tensor,
    travel_tensor=travel_tensor,
    visit_area_dim=64,
    travel_label_vectors=travel_label_vectors,
    batch_size=16
)

# 데이터 정합성 검증

In [232]:
import torch

# ID 매핑
user_ids = sorted(user_df["TRAVELER_ID"].unique())
travel_ids = sorted(travel_df["TRAVEL_ID"].unique())
visit_ids = sorted(visit_df["VISIT_AREA_ID"].unique())

user_id_map = {uid: i for i, uid in enumerate(user_ids)}
travel_id_map = {tid: i for i, tid in enumerate(travel_ids)}
visit_area_id_map = {vid: i for i, vid in enumerate(visit_ids)}

# user & travel tensor
user_feature_cols = [
    'GENDER', 'EDU_NM', 'EDU_FNSH_SE', 'MARR_STTS', 'JOB_NM', 'HOUSE_INCOME',
    'TRAVEL_TERM', 'TRAVEL_LIKE_SIDO_1', 'TRAVEL_LIKE_SIDO_2', 'TRAVEL_LIKE_SIDO_3',
    'AGE_GRP', 'FAMILY_MEMB', 'TRAVEL_NUM', 'TRAVEL_COMPANIONS_NUM',
    'TRAVEL_STYL_1', 'TRAVEL_STYL_2', 'TRAVEL_STYL_3', 'TRAVEL_STYL_4',
    'TRAVEL_STYL_5', 'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
    'TRAVEL_MOTIVE_1', 'TRAVEL_MOTIVE_2', 'INCOME'
]

travel_feature_cols = [
    'LODGOUT_COST', 'ACTIVITY_COST', 'TOTAL_COST', 'DURATION', 'PURPOSE_1',
    'PURPOSE_10', 'PURPOSE_11', 'PURPOSE_12', 'PURPOSE_13', 'PURPOSE_2',
    'PURPOSE_21', 'PURPOSE_22', 'PURPOSE_23', 'PURPOSE_24', 'PURPOSE_25',
    'PURPOSE_26', 'PURPOSE_27', 'PURPOSE_28', 'PURPOSE_3', 'PURPOSE_4',
    'PURPOSE_5', 'PURPOSE_6', 'PURPOSE_7', 'PURPOSE_8', 'PURPOSE_9',
    'MVMN_NM_ENC', 'age_ENC', 'whowith_ENC', 'mission_ENC'
]

user_tensor = torch.tensor(user_df[user_feature_cols].fillna(0).astype(float).values, dtype=torch.float)
travel_tensor = torch.tensor(travel_df[travel_feature_cols].fillna(0).astype(float).values, dtype=torch.float)

# travel_label_vectors 생성
travel_to_visits = visit_df.groupby("TRAVEL_ID")["VISIT_AREA_ID"].apply(list).to_dict()
travel_label_vectors = {}
for travel_id, visit_list in travel_to_visits.items():
    label = torch.zeros(len(visit_area_id_map))
    for vid in visit_list:
        if vid in visit_area_id_map:
            label[visit_area_id_map[vid]] = 1.0
    travel_label_vectors[travel_id] = label

# 불러온 build_travel_subgraph 함수 사용
from torch_geometric.data import HeteroData

invalid_travel_ids = []

for travel_id in travel_ids:
    try:
        data = build_travel_subgraph(
            travel_id=travel_id,
            travel_df=travel_df,
            visit_df=visit_df,
            move_df=move_df,
            user_id_map=user_id_map,
            travel_id_map=travel_id_map,
            visit_id_map=visit_area_id_map,
            user_features=user_tensor,
            travel_features=travel_tensor,
            visit_area_dim=128,
            travel_label_vectors=travel_label_vectors
        )

        if (
            data is None or
            data['user'].x is None or data['travel'].x is None or
            data['visit_area'].x is None or
            data['visit_area'].x.size(0) == 0 or
            data['visit_area'].y.size(0) != data['visit_area'].x.size(0)
        ):
            invalid_travel_ids.append(travel_id)

    except Exception:
        invalid_travel_ids.append(travel_id)

invalid_travel_ids[:20], len(invalid_travel_ids)


(['e_e000006',
  'e_e000011',
  'e_e000016',
  'e_e000020',
  'e_e000037',
  'e_e000040',
  'e_e000041',
  'e_e000293',
  'e_e000299',
  'e_e000302',
  'e_e000317',
  'e_e000329',
  'e_e000331',
  'e_e000332',
  'e_e000347',
  'e_e000352',
  'e_e000353',
  'e_e000357',
  'e_e000364',
  'e_e000371'],
 641)

In [233]:
from torch_geometric.data import HeteroData

invalid_travel_ids = []

for travel_id in travel_ids:
    try:
        data = build_travel_subgraph(
            travel_id=travel_id,
            travel_df=travel_df,
            visit_df=visit_df,
            move_df=move_df,
            user_id_map=user_id_map,
            travel_id_map=travel_id_map,
            visit_id_map=visit_area_id_map,
            user_features=user_tensor,
            travel_features=travel_tensor,
            visit_area_dim=64,
            travel_label_vectors=travel_label_vectors
        )

        if (
            data is None or
            data['user'].x is None or data['travel'].x is None or
            data['visit_area'].x is None or
            data['visit_area'].x.size(0) == 0 or
            data['visit_area'].y.size(0) != data['visit_area'].x.size(0)
        ):
            invalid_travel_ids.append(travel_id)

    except Exception as e:
        invalid_travel_ids.append(travel_id)

print("❌ 오류 발생 travel ID 개수:", len(invalid_travel_ids))
print("예시:", invalid_travel_ids[:10])


❌ 오류 발생 travel ID 개수: 641
예시: ['e_e000006', 'e_e000011', 'e_e000016', 'e_e000020', 'e_e000037', 'e_e000040', 'e_e000041', 'e_e000293', 'e_e000299', 'e_e000302']


In [234]:
# travel_df[travel_df['TRAVEL_ID'] == 'e_e000006']
# visit_df[visit_df['TRAVEL_ID'] == 'e_e000006']
# move_df[move_df['TRAVEL_ID'] == 'e_e000006']
user_df[user_df['TRAVELER_ID'] == 'e000011'] # 여기에 없음

Unnamed: 0,GENDER,EDU_NM,EDU_FNSH_SE,MARR_STTS,JOB_NM,HOUSE_INCOME,TRAVEL_TERM,TRAVEL_LIKE_SIDO_1,TRAVEL_LIKE_SIDO_2,TRAVEL_LIKE_SIDO_3,...,TRAVEL_STYL_3,TRAVEL_STYL_4,TRAVEL_STYL_5,TRAVEL_STYL_6,TRAVEL_STYL_7,TRAVEL_STYL_8,TRAVEL_MOTIVE_1,TRAVEL_MOTIVE_2,INCOME,TRAVELER_ID


## 오류 데이터 제거

In [235]:
import pandas as pd
import torch

invalid_travel_ids = travel_df[
    ~travel_df["TRAVELER_ID"].isin(user_df["TRAVELER_ID"])
]["TRAVEL_ID"].tolist()
valid_travel_df = travel_df[~travel_df["TRAVEL_ID"].isin(invalid_travel_ids)].reset_index(drop=True)
valid_user_df = user_df[user_df["TRAVELER_ID"].isin(valid_travel_df["TRAVELER_ID"])].reset_index(drop=True)
valid_visit_df = visit_df[~visit_df["TRAVEL_ID"].isin(invalid_travel_ids)].reset_index(drop=True)
valid_move_df = move_df[~move_df["TRAVEL_ID"].isin(invalid_travel_ids)].reset_index(drop=True)

# 3. ID 맵
user_id_map = {uid: i for i, uid in enumerate(sorted(valid_user_df["TRAVELER_ID"].unique()))}
travel_id_map = {tid: i for i, tid in enumerate(sorted(valid_travel_df["TRAVEL_ID"].unique()))}
visit_area_id_map = {vid: i for i, vid in enumerate(sorted(valid_visit_df["VISIT_AREA_ID"].unique()))}

# 4. Feature 추출
user_tensor = torch.tensor(valid_user_df[user_feature_cols].fillna(0).astype(float).values, dtype=torch.float)
travel_tensor = torch.tensor(valid_travel_df[travel_feature_cols].fillna(0).astype(float).values, dtype=torch.float)

# 5. Label vector 생성
travel_to_visits = valid_visit_df.groupby("TRAVEL_ID")["VISIT_AREA_ID"].apply(list).to_dict()
travel_label_vectors = {}

for travel_id, visit_list in travel_to_visits.items():
    label = torch.zeros(len(visit_area_id_map))
    for vid in visit_list:
        if vid in visit_area_id_map:
            label[visit_area_id_map[vid]] = 1.0
    travel_label_vectors[travel_id] = label


In [237]:
loader, skipped_ids = build_travel_dataloader(
    travel_ids=list(travel_label_vectors.keys()),  # 필터링된 travel만 사용
    travel_df=valid_travel_df,
    visit_df=valid_visit_df,
    move_df=valid_move_df,
    user_id_map=user_id_map,
    travel_id_map=travel_id_map,
    visit_id_map=visit_area_id_map,
    user_tensor=user_tensor,
    travel_tensor=travel_tensor,
    visit_area_dim=128,
    travel_label_vectors=travel_label_vectors,
    batch_size=16
)

# 모델링

In [None]:
import torch
import torch.nn.functional as F
from torch import nn
from torch_geometric.nn import HeteroConv, SAGEConv

class SafeProjectedGNN(nn.Module):
    def __init__(self, metadata, user_input_dim, travel_input_dim, hidden_dim=64):
        super().__init__()
        self.hidden_dim = hidden_dim

        self.input_proj = nn.ModuleDict({
            'user': nn.Linear(user_input_dim, hidden_dim),
            'travel': nn.Linear(travel_input_dim, hidden_dim),
            'visit_area': nn.Identity()  # feature 없는 dummy 입력
        })

        self.conv1 = HeteroConv({
            edge_type: SAGEConv((-1, -1), hidden_dim)
            for edge_type in metadata[1]
        }, aggr='sum')

        self.conv2 = HeteroConv({
            edge_type: SAGEConv((hidden_dim, hidden_dim), hidden_dim)
            for edge_type in metadata[1]
        }, aggr='sum')

        self.conv3 = HeteroConv({
            edge_type: SAGEConv((hidden_dim, hidden_dim), hidden_dim)
            for edge_type in metadata[1]
        }, aggr='sum')

        self.dropout = nn.Dropout(0.3)

        self.expert_location = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

        self.expert_preference = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, 1)
        )

        self.gate = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 2)
        )

    def forward(self, x_dict, edge_index_dict, feedback_mask=None):
        x_dict = {
            k: self.input_proj[k](v) if k in self.input_proj and v is not None else v
            for k, v in x_dict.items()
        }

        h_dict = self.conv1(x_dict, edge_index_dict)
        h_dict = {k: self.dropout(F.relu(v)) for k, v in h_dict.items()}

        h_dict = self.conv2(h_dict, edge_index_dict)
        h_dict = {k: self.dropout(F.relu(v)) for k, v in h_dict.items()}

        h_dict = self.conv3(h_dict, edge_index_dict)

        h_visit = h_dict['visit_area']
        loc_score = self.expert_location(h_visit).squeeze(-1)
        pref_score = self.expert_preference(h_visit).squeeze(-1)
        gate_weight = torch.softmax(self.gate(h_visit), dim=-1)

        score = gate_weight[:, 0] * loc_score + gate_weight[:, 1] * pref_score

        if feedback_mask is not None:
            score = score + feedback_mask

        return score  # shape: [num_visit_area]

### todo list
- 방문지 정보 정리 (집, 아파트, 호텔)
- 이동정보 데이터도 정리해야됨
- 방문지 및 이동정보 정리한 것 기반으로 리펙토링 할 것

# 학습

In [238]:
import torch
import torch.nn.functional as F

def train_visit_gnn(model, loader, optimizer, device='cuda' if torch.cuda.is_available() else 'cpu', num_epochs=10):
    model = model.to(device)
    model.train()

    for epoch in range(num_epochs):
        total_loss = 0.0
        total_samples = 0

        for batch in loader:
            batch = batch.to(device)

            optimizer.zero_grad()
            pred = model(batch.x_dict, batch.edge_index_dict)  # [num_visit_area]
            label = batch['visit_area'].y.to(pred.device)      # [num_visit_area]

            loss = F.binary_cross_entropy_with_logits(pred, label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            total_samples += 1

        avg_loss = total_loss / total_samples
        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {avg_loss:.4f}")

In [None]:
metadata = loader.dataset[0].metadata()

model = SafeProjectedGNN(
    metadata=metadata,
    user_input_dim=user_tensor.shape[1],     # 25
    travel_input_dim=travel_tensor.shape[1]  # 29
)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

train_visit_gnn(model, loader, optimizer, num_epochs=10)

[Epoch 1/10] Loss: 0.0568
[Epoch 2/10] Loss: 0.0001
[Epoch 3/10] Loss: 0.0000
[Epoch 4/10] Loss: 0.0000
[Epoch 5/10] Loss: 0.0000
[Epoch 6/10] Loss: 0.0000
[Epoch 7/10] Loss: 0.0000
[Epoch 8/10] Loss: 0.0000
[Epoch 9/10] Loss: 0.0000
[Epoch 10/10] Loss: 0.0000


# 추론 테스트

In [242]:
def predict_travel_visit_topk(model, travel_id, travel_df, visit_df, move_df,
                               user_id_map, travel_id_map, visit_id_map,
                               user_tensor, travel_tensor, travel_label_vectors,
                               visit_area_id_map, k=5):
    model.eval()

    # travel 단위 GNN 입력 데이터 생성
    data = build_travel_subgraph(
        travel_id=travel_id,
        travel_df=travel_df,
        visit_df=visit_df,
        move_df=move_df,
        user_id_map=user_id_map,
        travel_id_map=travel_id_map,
        visit_id_map=visit_area_id_map,
        user_features=user_tensor,
        travel_features=travel_tensor,
        visit_area_dim=128,
        travel_label_vectors=travel_label_vectors
    )

    if data is None or data['visit_area'].x.size(0) == 0:
        return []


    with torch.no_grad():
        pred = model(data.x_dict, data.edge_index_dict)  # [num_visit_area]
        k_safe = min(k, pred.size(0))  # 가능한 최대 추천 수로 보정
        if k < pred.size(0):
            k_safe = pred.size(0)
        topk = torch.topk(pred, k=k_safe)
        indices = topk.indices.tolist()
        scores = topk.values.tolist()


    # 실제 visit_area ID 복원
    visit_ids = visit_df[visit_df["TRAVEL_ID"] == travel_id]["VISIT_AREA_ID"].tolist()
    v_map = [visit_area_id_map[vid] for vid in visit_ids if vid in visit_area_id_map]
    id_to_local = {i: vid for i, vid in enumerate(v_map)}

    result = [(id_to_local[i], scores[j]) for j, i in enumerate(indices)]
    return result

In [226]:
travel_ids = valid_travel_df["TRAVEL_ID"].to_list()

for tid in travel_ids[:10]:
    results = predict_travel_visit_topk(
        model=model,
        travel_id=tid,
        travel_df=valid_travel_df,
        visit_df=valid_visit_df,
        move_df=valid_move_df,
        user_id_map=user_id_map,
        travel_id_map=travel_id_map,
        visit_id_map=visit_area_id_map,
        user_tensor=user_tensor,
        travel_tensor=travel_tensor,
        travel_label_vectors=travel_label_vectors,
        visit_area_id_map=visit_area_id_map,
        k=5
    )

    print(f"🔍 Top-K 추천 결과 for {tid}:")
    print(results)

🔍 Top-K 추천 결과 for e_e000004:
[(18, 14.906047821044922), (17, 14.905040740966797), (16, 14.869921684265137)]
🔍 Top-K 추천 결과 for e_e000009:
[(20, 14.91132926940918), (19, 14.91132926940918), (18, 14.91132926940918), (15, 14.911328315734863), (12, 14.911328315734863), (16, 14.911328315734863), (17, 14.911328315734863), (10, 14.911325454711914), (9, 14.911255836486816), (8, 14.9105224609375), (7, 14.886672973632812)]
🔍 Top-K 추천 결과 for e_e000010:
[(14, 14.831337928771973), (13, 14.83113956451416), (17, 14.83113956451416), (16, 14.830097198486328), (12, 14.830097198486328), (11, 14.785408020019531), (15, 14.785408020019531)]
🔍 Top-K 추천 결과 for e_e000013:
[(6, 14.903336524963379), (7, 14.903336524963379), (4, 14.903331756591797), (3, 14.903213500976562), (2, 14.90153694152832), (1, 14.859915733337402)]
🔍 Top-K 추천 결과 for e_e000014:
[(23, 14.873223304748535)]
🔍 Top-K 추천 결과 for e_e000015:
[(18, 14.90087890625), (22, 14.90087890625), (21, 14.90087890625), (20, 14.90087890625), (19, 14.90087890625),

In [244]:
def get_enriched_topk_visits(travel_id, model,
                              travel_df, visit_df, move_df,
                              user_id_map, travel_id_map, visit_id_map,
                              user_tensor, travel_tensor,
                              travel_label_vectors,
                              k=5):
    # 1. index → ID 변환 매핑
    index_to_visit_id = {v: k for k, v in visit_id_map.items()}

    # 2. 추천 수행
    topk_result = predict_travel_visit_topk(
        model=model,
        travel_id=travel_id,
        travel_df=travel_df,
        visit_df=visit_df,
        move_df=move_df,
        user_id_map=user_id_map,
        travel_id_map=travel_id_map,
        visit_id_map=visit_id_map,
        user_tensor=user_tensor,
        travel_tensor=travel_tensor,
        travel_label_vectors=travel_label_vectors,
        visit_area_id_map=visit_id_map,
        k=k
    )
    
    if not isinstance(topk_result, list) or len(topk_result) == 0:
        print(f"추천 결과 없음: travel_id = {travel_id}")
        return pd.DataFrame(columns=["VISIT_AREA_ID", "VISIT_AREA_NM", "pred_score"])

    
    # visit_df ID 컬럼 문자열로 통일
    visit_df = visit_df.copy()  # 💡 이 줄을 꼭 추가!
    visit_df["VISIT_AREA_ID"] = visit_df["VISIT_AREA_ID"].astype(str).str.strip()

    # top_visit_ids도 문자열로
    top_visit_ids = [str(index_to_visit_id[i]).strip() for i, _ in topk_result]


    # 4. 정보 연결
    result_df = visit_df[visit_df["VISIT_AREA_ID"].isin(top_visit_ids)].drop_duplicates(subset="VISIT_AREA_ID")
    result_df["pred_score"] = [score for _, score in topk_result]

    return result_df.sort_values("pred_score", ascending=False).reset_index(drop=True)


In [245]:
total = None

In [246]:
for tid in travel_ids[0:20]:
    
    results = get_enriched_topk_visits(
        model=model,
        travel_id=tid,
        travel_df=valid_travel_df,
        visit_df=valid_visit_df,
        move_df=valid_move_df,
        user_id_map=user_id_map,
        travel_id_map=travel_id_map,
        visit_id_map=visit_area_id_map,
        user_tensor=user_tensor,
        travel_tensor=travel_tensor,
        travel_label_vectors=travel_label_vectors,
        k=5
    )
    if total is None:
        total = results.copy()
    else:
        total = pd.concat([total, results], ignore_index=True)

    print(results[["VISIT_AREA_ID", "VISIT_AREA_NM", "ROAD_NM_ADDR", "X_COORD", "Y_COORD", "pred_score"]])
    print("=" * 50)


  VISIT_AREA_ID        VISIT_AREA_NM             ROAD_NM_ADDR     X_COORD  \
0    2304300002  화성 관광열차 안내소 연무대 매표소  경기 수원시 팔달구 창룡대로103번길 20  127.023339   
1    2304300003                  창룡문                      NaN  127.025143   
2    2304300004            수원 화성 화홍문                      NaN  127.017626   

     Y_COORD  pred_score  
0  37.287878   14.293184  
1  37.287791   14.292967  
2  37.287546   14.290283  
   VISIT_AREA_ID        VISIT_AREA_NM             ROAD_NM_ADDR     X_COORD  \
0     2304300002  화성 관광열차 안내소 연무대 매표소  경기 수원시 팔달구 창룡대로103번길 20  127.023339   
1     2304300003                  창룡문                      NaN  127.025143   
2     2304300004            수원 화성 화홍문                      NaN  127.017626   
3     2304290002              농협안성팜랜드       경기 안성시 공도읍 대신두길 28  127.193517   
4     2304290003              스타필드 안성  경기 안성시 공도읍 서동대로 3930-39  127.147095   
5     2304290004            대동국수 평택역점            경기 평택시 자유로 18  127.091349   
6     2304290005                   편지

In [218]:
model_path = "gnn_visit_recommender.pt"
torch.save(model.state_dict(), model_path)

## 실제 입력 대비

In [198]:
def recommend_top_k_places(model, base_data, new_user_tensor, new_travel_tensor, k=5):
    model.eval()
    
    # 1. base_data 복사
    data = base_data.clone()

    # 2. 유저 추가
    data['user'].x = torch.cat([data['user'].x, new_user_tensor.unsqueeze(0)], dim=0)
    new_user_idx = data['user'].x.shape[0] - 1

    # 3. 여행 추가
    data['travel'].x = torch.cat([data['travel'].x, new_travel_tensor.unsqueeze(0)], dim=0)
    new_travel_idx = data['travel'].x.shape[0] - 1

    # 4. user → travel 엣지 추가
    edge_ut = data[('user', 'traveled', 'travel')].edge_index
    edge_ut = torch.cat([edge_ut, torch.tensor([[new_user_idx], [new_travel_idx]], dtype=torch.long)], dim=1)
    data[('user', 'traveled', 'travel')].edge_index = edge_ut

    # 5. 역방향 travel → user 엣지도 추가
    edge_tu = data[('travel', 'traveled_by', 'user')].edge_index
    edge_tu = torch.cat([edge_tu, torch.tensor([[new_travel_idx], [new_user_idx]], dtype=torch.long)], dim=1)
    data[('travel', 'traveled_by', 'user')].edge_index = edge_tu

    # 6. 모델 추론
    with torch.no_grad():
        scores = model(data.x_dict, data.edge_index_dict)  # shape: [num_visit_area]
        topk = torch.topk(scores, k=k)
        indices = topk.indices.tolist()
        values = topk.values.tolist()

    return list(zip(indices, values))


In [None]:
# new_user_tensor = torch.tensor([...], dtype=torch.float)  # shape: [25]
# new_travel_tensor = torch.tensor([...], dtype=torch.float)  # shape: [29]

# top_k_results = recommend_top_k_places(model, base_data=data, new_user_tensor=new_user_tensor, new_travel_tensor=new_travel_tensor, k=5)

# for i, (idx, score) in enumerate(top_k_results):
#     print(f"TOP {i+1}: visit_area index = {idx}, score = {score:.4f}")


TypeError: must be real number, not ellipsis

# 지도 시각화

In [215]:
import folium

def visualize_recommendations_on_map(result_df):
    # 중심 좌표는 첫 번째 추천 결과 기준
    center_lat = result_df.iloc[0]['Y_COORD']
    center_lon = result_df.iloc[0]['X_COORD']

    m = folium.Map(location=[center_lat, center_lon], zoom_start=11)

    for _, row in result_df.iterrows():
        try:
            lat, lon = row['Y_COORD'], row['X_COORD']
            name = row['VISIT_AREA_NM']
            score = row.get('SCORE', None)
            id = row['TRAVEL_ID']
            popup_text = f"{id} {name}<br>Score: {score:.2f}" if score else name

            folium.Marker(
                location=[lat, lon],
                popup=id,
                tooltip=name,
                icon=folium.Icon(color="blue", icon="info-sign")
            ).add_to(m)
        except:
            ...

    return m


In [216]:
map_obj = visualize_recommendations_on_map(total)
map_obj.save("recommend_map.html")
map_obj  # Jupyter에서는 이 줄만 있으면 바로 표시됨

### todo list
- 방문지 정보 정리 (집, 아파트, 호텔)
- 이동정보 데이터도 정리해야됨
- 방문지 및 이동정보 정리한 것 기반으로 리펙토링 할 것