In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv, GATConv, global_mean_pool, global_max_pool
from torch_geometric.data import HeteroData
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import random
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

class ImprovedTravelGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, travel_context_dim, 
                 num_heads=4, dropout=0.2):
        super().__init__()
        
        self.gat1 = GATConv(in_channels, hidden_channels // num_heads, 
                           heads=num_heads, dropout=dropout, concat=True)
        self.gat2 = GATConv(hidden_channels, hidden_channels // num_heads, 
                           heads=num_heads, dropout=dropout, concat=True)
        self.gat3 = GATConv(hidden_channels, out_channels, 
                           heads=1, dropout=dropout, concat=False)
        
        self.bn1 = nn.BatchNorm1d(hidden_channels)
        self.bn2 = nn.BatchNorm1d(hidden_channels)
        self.bn3 = nn.BatchNorm1d(out_channels)
        
        self.travel_encoder = nn.Sequential(
            nn.Linear(travel_context_dim, hidden_channels),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels, out_channels)
        )
        
        self.fusion_net = nn.Sequential(
            nn.Linear(out_channels * 2, hidden_channels),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels, out_channels),
            nn.ReLU(),
            nn.Linear(out_channels, out_channels)
        )
        
        self.preference_head = nn.Sequential(
            nn.Linear(out_channels, hidden_channels // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels // 2, 1),
            nn.Sigmoid()
        )
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, data, travel_context, return_attention=False):
        x = data['visit_area'].x
        edge_index = data['visit_area', 'moved_to', 'visit_area'].edge_index
        
        x1 = self.gat1(x, edge_index)
        x1 = self.bn1(x1)
        x1 = F.relu(x1)
        x1 = self.dropout(x1)
        
        x2 = self.gat2(x1, edge_index)
        x2 = self.bn2(x2)
        x2 = F.relu(x2 + x1)
        x2 = self.dropout(x2)
        
        graph_embedding = self.gat3(x2, edge_index)
        graph_embedding = self.bn3(graph_embedding)
        
        travel_embedding = self.travel_encoder(travel_context)
        travel_embedding_expanded = travel_embedding.expand(graph_embedding.size(0), -1)
        
        fused_features = torch.cat([graph_embedding, travel_embedding_expanded], dim=1)
        final_embedding = self.fusion_net(fused_features)
        
        preference_scores = self.preference_head(final_embedding)
        
        return final_embedding, preference_scores

class EnhancedDataProcessor:
    def __init__(self):
        self.visit_scaler = RobustScaler()
        self.travel_scaler = StandardScaler()
        
    def process_visit_area_features(self, visit_area_df):
        visit_area_df = visit_area_df.copy()
        
        # 좌표 결측치 처리
        visit_area_df['X_COORD'] = visit_area_df['X_COORD'].fillna(visit_area_df['X_COORD'].mean())
        visit_area_df['Y_COORD'] = visit_area_df['Y_COORD'].fillna(visit_area_df['Y_COORD'].mean())
        visit_area_df['VISIT_CHC_REASON_CD'] = visit_area_df['VISIT_CHC_REASON_CD'].fillna(0)
        
        features = visit_area_df[['X_COORD', 'Y_COORD']].copy()
        
        # One-hot encoding
        type_onehot = pd.get_dummies(visit_area_df['VISIT_AREA_TYPE_CD'], prefix='type')
        reason_onehot = pd.get_dummies(visit_area_df['VISIT_CHC_REASON_CD'].fillna(0), prefix='reason')
        
        # 정규화된 만족도 점수
        for col in ['DGSTFN', 'REVISIT_INTENTION', 'RCMDTN_INTENTION']:
            visit_area_df[col] = visit_area_df[col].fillna(3)
            visit_area_df[f'{col}_norm'] = (visit_area_df[col] - 1) / 4.0
        
        # 인기도 점수
        visit_area_df['popularity_score'] = (
            visit_area_df['DGSTFN_norm'] * 0.4 + 
            visit_area_df['REVISIT_INTENTION_norm'] * 0.3 + 
            visit_area_df['RCMDTN_INTENTION_norm'] * 0.3
        )
        
        # 모든 특성 결합
        features = pd.concat([
            features, type_onehot, reason_onehot,
            visit_area_df[['DGSTFN_norm', 'REVISIT_INTENTION_norm', 'RCMDTN_INTENTION_norm', 'popularity_score']]
        ], axis=1)
        
        return self.visit_scaler.fit_transform(features.values.astype(np.float32))
    
    def create_enhanced_edges(self, move_df, visit_area_df):
        edges = []
        edge_weights = []
        
        for travel_id, group in move_df.groupby("TRAVEL_ID"):
            group = group.sort_values("TRIP_ID").reset_index(drop=True)
            
            for i in range(1, len(group)):
                from_id = group.loc[i-1, "END_NEW_ID"]
                to_id = group.loc[i, "END_NEW_ID"]
                
                if pd.notna(from_id) and pd.notna(to_id):
                    duration = group.loc[i, "DURATION_MINUTES"] if "DURATION_MINUTES" in group.columns else 0
                    transport = group.loc[i, "MVMN_CD_1"] if "MVMN_CD_1" in group.columns else 0
                    
                    edges.append([int(from_id), int(to_id), duration, transport])
                    edge_weights.append(1.0)  # 기본 가중치
        
        edges_df = pd.DataFrame(edges, columns=["FROM_ID", "TO_ID", "DURATION_MINUTES", "MVMN_CD_1"])
        
        # 교통수단 원핫 인코딩
        edges_df["MVMN_TYPE"] = edges_df["MVMN_CD_1"].apply(
            lambda code: "drive" if code in [1,2,3] else "public" if code in [4,5,6,7,8,9,10,11,12,13,50] else "other"
        )
        edges_df["is_drive"] = (edges_df["MVMN_TYPE"] == "drive").astype(int)
        edges_df["is_public"] = (edges_df["MVMN_TYPE"] == "public").astype(int)
        edges_df["is_other"] = (edges_df["MVMN_TYPE"] == "other").astype(int)
        
        edge_index = torch.tensor(edges_df[["FROM_ID", "TO_ID"]].values.T, dtype=torch.long)
        edge_attr = torch.tensor(np.column_stack([
            edges_df[["DURATION_MINUTES"]].fillna(0).values,
            edges_df[["is_drive", "is_public", "is_other"]].values,
            np.array(edge_weights).reshape(-1, 1)
        ]), dtype=torch.float32)
        
        return edge_index, edge_attr

class SmartRecommendationEngine:
    def __init__(self, model, visit_area_df, device):
        self.model = model
        self.visit_area_df = visit_area_df
        self.device = device
        self.user_feedback_history = []
        self.preference_weights = None
        
    def get_recommendations(self, data, travel_context, top_k=10, diversity_weight=0.3, 
                          excluded_ids=None):
        """
        다양성을 고려한 추천 (제외할 ID 목록 지원)
        """
        self.model.eval()
        
        with torch.no_grad():
            embeddings, preference_scores = self.model(data, travel_context)
            
        scores = preference_scores.squeeze()
        
        # 제외할 ID들을 낮은 점수로 설정
        if excluded_ids:
            for exclude_id in excluded_ids:
                # visit_area_df에서 해당 ID의 인덱스 찾기
                matching_indices = self.visit_area_df[
                    self.visit_area_df['NEW_VISIT_AREA_ID'] == exclude_id
                ].index.tolist()
                
                for idx in matching_indices:
                    if idx < len(scores):
                        scores[idx] = -1.0  # 매우 낮은 점수로 설정
        
        # 피드백 기반 점수 조정
        if self.preference_weights is not None:
            scores = self._apply_preference_weights(scores, embeddings)
        
        # MMR 기반 추천
        recommendations = []
        remaining_indices = list(range(len(scores)))
        
        # 제외된 인덱스들을 remaining_indices에서 제거
        if excluded_ids:
            for exclude_id in excluded_ids:
                matching_indices = self.visit_area_df[
                    self.visit_area_df['NEW_VISIT_AREA_ID'] == exclude_id
                ].index.tolist()
                for idx in matching_indices:
                    if idx in remaining_indices:
                        remaining_indices.remove(idx)
        
        # 첫 번째 추천
        if remaining_indices:
            valid_scores = [(i, scores[i].item()) for i in remaining_indices]
            best_idx = max(valid_scores, key=lambda x: x[1])[0]
            recommendations.append(best_idx)
            remaining_indices.remove(best_idx)
        
        # 나머지 추천
        for _ in range(min(top_k - 1, len(remaining_indices))):
            if not remaining_indices:
                break
                
            best_score = -float('inf')
            best_idx = None
            
            for idx in remaining_indices:
                relevance = scores[idx].item()
                
                if relevance < 0:  # 제외된 항목 스킵
                    continue
                
                # 다양성 계산
                similarities = []
                for rec_idx in recommendations:
                    sim = F.cosine_similarity(
                        embeddings[idx:idx+1], 
                        embeddings[rec_idx:rec_idx+1]
                    ).item()
                    similarities.append(sim)
                
                diversity = 1 - max(similarities) if similarities else 1
                final_score = (1 - diversity_weight) * relevance + diversity_weight * diversity
                
                if final_score > best_score:
                    best_score = final_score
                    best_idx = idx
            
            if best_idx is not None:
                recommendations.append(best_idx)
                remaining_indices.remove(best_idx)
        
        return recommendations, embeddings, preference_scores
    
    def _apply_preference_weights(self, scores, embeddings):
        """피드백 기반 점수 조정"""
        if not self.preference_weights:
            return scores
            
        # 선호 타입에 대한 가중치 적용
        adjusted_scores = scores.clone()
        
        for i, row in self.visit_area_df.iterrows():
            if i >= len(adjusted_scores):
                break
                
            area_type = row.get('VISIT_AREA_TYPE_CD', 0)
            
            # 선호 타입이면 점수 증가
            if area_type in self.preference_weights.get('preferred_types', []):
                adjusted_scores[i] *= 1.2
            
            # 비선호 타입이면 점수 감소
            if area_type in self.preference_weights.get('avoided_types', []):
                adjusted_scores[i] *= 0.8
        
        return adjusted_scores
    
    def update_with_feedback(self, liked_items, disliked_items, embeddings):
        """사용자 피드백 업데이트"""
        feedback = {
            'liked': liked_items,
            'disliked': disliked_items,
            'embeddings': embeddings.cpu().numpy()
        }
        self.user_feedback_history.append(feedback)
        
        # 선호도 가중치 업데이트
        self.preference_weights = self._calculate_preference_weights()
        
        print(f"✅ 피드백 업데이트 완료: 좋아요 {len(liked_items)}개, 싫어요 {len(disliked_items)}개")
        
        return self.preference_weights
    
    def _calculate_preference_weights(self):
        """피드백 히스토리를 바탕으로 선호도 가중치 계산"""
        if not self.user_feedback_history:
            return None
            
        liked_features = []
        disliked_features = []
        
        for feedback in self.user_feedback_history:
            for item_idx in feedback['liked']:
                if item_idx < len(self.visit_area_df):
                    liked_features.append(self.visit_area_df.iloc[item_idx])
            
            for item_idx in feedback['disliked']:
                if item_idx < len(self.visit_area_df):
                    disliked_features.append(self.visit_area_df.iloc[item_idx])
        
        preferred_types = [item.get('VISIT_AREA_TYPE_CD', 0) for item in liked_features]
        avoided_types = [item.get('VISIT_AREA_TYPE_CD', 0) for item in disliked_features]
        
        return {
            'preferred_types': list(set(preferred_types)),
            'avoided_types': list(set(avoided_types)),
            'preferred_regions': [(item.get('X_COORD', 0), item.get('Y_COORD', 0)) for item in liked_features]
        }

class OptimizedRouteGenerator:
    def __init__(self):
        pass
        
    def generate_daily_routes(self, recommendations, visit_area_df, travel_duration, 
                            optimization_method='kmeans_tsp'):
        """일별 최적 경로 생성"""
        if travel_duration <= 0:
            travel_duration = 1
            
        coords = []
        locations = []
        
        for idx in recommendations:
            if idx < len(visit_area_df):
                row = visit_area_df.iloc[idx]
                coords.append([row['X_COORD'], row['Y_COORD']])
                locations.append({
                    'id': row['NEW_VISIT_AREA_ID'],
                    'name': row['VISIT_AREA_NM'],
                    'coords': [row['X_COORD'], row['Y_COORD']],
                    'idx': idx,
                    'type': row.get('VISIT_AREA_TYPE_CD', 0)
                })
        
        if len(coords) == 0:
            return {}
            
        coords = np.array(coords)
        # NaN 처리 (예: 0으로 대체)
        coords = np.nan_to_num(coords, nan=0.0)

        
        # K-means 클러스터링
        n_clusters = min(travel_duration, len(locations))
        if n_clusters > 1:
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            day_labels = kmeans.fit_predict(coords)
        else:
            day_labels = np.zeros(len(locations))
        
        # 클러스터 후 단일 Day 제거
        daily_groups = {}
        for i, loc in enumerate(locations):
            day = int(day_labels[i])
            if day not in daily_groups:
                daily_groups[day] = []
            daily_groups[day].append(loc)

        # Day 단일 클러스터 제거: 최소 2개 이상으로 유지
        for day, locs in list(daily_groups.items()):
            if len(locs) == 1:
                # Day1로 재배정
                if 0 in daily_groups:
                    daily_groups[0].extend(locs)
                else:
                    # Day1이 없으면 Day2로 합침
                    target_day = next((d for d in daily_groups if d != day), 0)
                    daily_groups[target_day].extend(locs)
                del daily_groups[day]
        
        # TSP 최적화
        optimized_routes = {}
        for day, locations_day in daily_groups.items():
            if len(locations_day) > 1:
                optimized_order = self._solve_tsp_simple(locations_day)
                optimized_routes[day] = optimized_order
            else:
                optimized_routes[day] = locations_day
        
        return optimized_routes
    
    def _solve_tsp_simple(self, locations):
        """간단한 TSP 해법"""
        if len(locations) <= 2:
            return locations
            
        coords = np.array([loc['coords'] for loc in locations])
        n = len(coords)
        
        # 거리 행렬
        dist_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(n):
                dist_matrix[i][j] = np.linalg.norm(coords[i] - coords[j])
        
        # Nearest Neighbor
        unvisited = set(range(1, n))
        current = 0
        route = [0]
        
        while unvisited:
            nearest = min(unvisited, key=lambda x: dist_matrix[current][x])
            route.append(nearest)
            unvisited.remove(nearest)
            current = nearest
        
        return [locations[i] for i in route]

def process_travel_input(travel_info: dict):
    """여행 정보 전처리 함수"""
    travel_feature_cols = [
        'TOTAL_COST_BINNED_ENCODED', 'WITH_PET', 'MONTH', 'DURATION',
        'MVMN_기타', 'MVMN_대중교통', 'MVMN_자가용',
        'TRAVEL_PURPOSE_1', 'TRAVEL_PURPOSE_2', 'TRAVEL_PURPOSE_3',
        'TRAVEL_PURPOSE_4', 'TRAVEL_PURPOSE_5', 'TRAVEL_PURPOSE_6',
        'TRAVEL_PURPOSE_7', 'TRAVEL_PURPOSE_8', 'TRAVEL_PURPOSE_9',
        'WHOWITH_2인여행', 'WHOWITH_가족여행', 'WHOWITH_기타',
        'WHOWITH_단독여행', 'WHOWITH_친구/지인 여행'
    ]
    
    # 반려동물 동반
    travel_info['mission_ENC'] = travel_info['mission_ENC'].strip().split(',')
    travel_info['WITH_PET'] = 1 if '0' in travel_info['mission_ENC'] else 0
        
    # 여행 목적
    for i in range(1, 10):
        travel_info[f'TRAVEL_PURPOSE_{i}'] = 1 if str(i) in travel_info['mission_ENC'] else 0
        
    # 날짜 처리
    dates = travel_info['date_range'].split(' - ')
    start_date = datetime.strptime(dates[0].strip(), "%Y-%m-%d")
    end_date = datetime.strptime(dates[1].strip(), "%Y-%m-%d")
    
    travel_info['MONTH'] = end_date.month
    travel_info['DURATION'] = (end_date - start_date).days
    
    # 교통수단
    for m in ['자가용', '대중교통', '기타']:
        travel_info[f"MVMN_{m}"] = 0
    
    if travel_info['MVMN_NM_ENC'] == '1':
        travel_info['MVMN_자가용'] = 1
    elif travel_info['MVMN_NM_ENC'] == '2':
        travel_info['MVMN_대중교통'] = 1
    else:
        travel_info['MVMN_기타'] = 1
    
    # 동행자
    whowith_onehot = [0] * 5
    idx = int(travel_info['whowith_ENC']) - 1
    if 0 <= idx < 5:
        whowith_onehot[idx] = 1
    
    travel_info.update({
        'WHOWITH_단독여행': whowith_onehot[0],
        'WHOWITH_2인여행': whowith_onehot[1],
        'WHOWITH_가족여행': whowith_onehot[2],
        'WHOWITH_친구/지인 여행': whowith_onehot[3],
        'WHOWITH_기타': whowith_onehot[4],
    })
    
    # 비용
    travel_info['TOTAL_COST_BINNED_ENCODED'] = int(travel_info['TOTAL_COST'])
    
    # 최종 벡터 생성
    travel_vector = [int(travel_info.get(k, 0)) for k in travel_feature_cols]
    
    return np.array([travel_vector]).astype(np.float32)

def simulate_user_feedback():
    """사용자 피드백 시뮬레이션"""
    feedback_options = [
        {"liked": [], "disliked": [0, 2]},  # 첫 번째와 세 번째 장소 싫어요
        {"liked": [1], "disliked": [4, 7]},  # 두 번째 장소 좋아요, 다른 곳들 싫어요
        {"liked": [0, 3], "disliked": [5]},  # 복수 좋아요/싫어요
    ]
    
    return random.choice(feedback_options)

def main_feedback_test():
    print("🚀 피드백 기반 경로 대체 테스트 시작!")
    print("=" * 60)

    # 기존 데이터 로딩
    move_path = "../data/VL_csv/move_with_new_id_final.csv"
    travel_path = "tn_travel_processed.csv"
    visit_area_path = "../data/VL_csv/visit_area_with_new_id_final.csv"

    move_df = pd.read_csv(move_path)
    travel_df = pd.read_csv(travel_path)
    visit_area_df = pd.read_csv(visit_area_path)

    processor = EnhancedDataProcessor()
    visit_area_tensor = processor.process_visit_area_features(visit_area_df)
    edge_index, edge_attr = processor.create_enhanced_edges(move_df, visit_area_df)

    data = HeteroData()
    data['visit_area'].x = torch.tensor(visit_area_tensor, dtype=torch.float32)
    data['visit_area', 'moved_to', 'visit_area'].edge_index = edge_index
    data['visit_area', 'moved_to', 'visit_area'].edge_attr = edge_attr

    # 여행 정보
    travel_example = {
        'mission_ENC': '0,1,2',
        'date_range': '2025-09-28 - 2025-09-30',  # 3일 여행
        'start_date': '',
        'end_date': '',
        'TOTAL_COST': '2',
        'MVMN_NM_ENC': '2',
        'whowith_ENC': '2',
        'mission_type': 'normal'
    }
    travel_tensor = process_travel_input(travel_example)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"📱 사용 디바이스: {device}")

    model = ImprovedTravelGNN(
        in_channels=visit_area_tensor.shape[1],
        hidden_channels=128,
        out_channels=64,
        travel_context_dim=travel_tensor.shape[1],
        num_heads=4,
        dropout=0.2
    ).to(device)

    data = data.to(device)
    travel_context_tensor = torch.tensor(travel_tensor, dtype=torch.float32).to(device)
    recommender = SmartRecommendationEngine(model, visit_area_df, device)

    # 초기 추천 (중복 제거)
    recommendations, embeddings, _ = recommender.get_recommendations(
        data, travel_context_tensor, top_k=20, diversity_weight=0.3
    )

    # 중복 방문지 제거
    unique_recommendations, seen_ids = [], set()
    for idx in recommendations:
        area_id = visit_area_df.iloc[idx]['NEW_VISIT_AREA_ID']
        if area_id not in seen_ids and area_id != 0:
            unique_recommendations.append(idx)
            seen_ids.add(area_id)
        if len(unique_recommendations) == 10:
            break

    # 최적화 경로 생성
    route_generator = OptimizedRouteGenerator()
    travel_duration = int(travel_tensor[0, 3])
    optimized_routes = route_generator.generate_daily_routes(
        unique_recommendations, visit_area_df, travel_duration
    )

    print("\n🗓️ 초기 여행 일정 (최적화):")
    for day, route in sorted(optimized_routes.items()):
        print(f"\n📅 Day {day + 1}:")
        for loc in route:
            print(f" - [{loc['id']:3d}] {loc['name']}")

    # 피드백 라운드 반복
    for round_num in range(3):
        print(f"\n🔄 피드백 라운드 {round_num + 1}")
        feedback = simulate_user_feedback()
        liked_indices = [recommendations[i] for i in feedback["liked"] if i < len(recommendations)]
        disliked_indices = [recommendations[i] for i in feedback["disliked"] if i < len(recommendations)]

        recommender.update_with_feedback(liked_indices, disliked_indices, embeddings)

        # 제외된 항목 반영
        excluded_ids = {visit_area_df.iloc[idx]['NEW_VISIT_AREA_ID'] for idx in disliked_indices}
        recommendations, embeddings, _ = recommender.get_recommendations(
            data, travel_context_tensor, top_k=20, diversity_weight=0.3, excluded_ids=excluded_ids
        )

        unique_recommendations, seen_ids = [], set()
        for idx in recommendations:
            area_id = visit_area_df.iloc[idx]['NEW_VISIT_AREA_ID']
            if area_id not in seen_ids and area_id not in excluded_ids and area_id != 0:
                unique_recommendations.append(idx)
                seen_ids.add(area_id)
            if len(unique_recommendations) == 10:
                break

        optimized_routes = route_generator.generate_daily_routes(
            unique_recommendations, visit_area_df, travel_duration
        )

        print("\n🎯 피드백 반영 후 최적화된 여행 일정:")
        for day, route in sorted(optimized_routes.items()):
            if len(route) < 2 and len(optimized_routes) > 1:
                continue  # 단일 장소 Day 제거
            print(f"\n📅 Day {day + 1}:")
            for loc in route:
                print(f" - [{loc['id']:3d}] {loc['name']}")

    print("\n✅ 사용자 피드백 기반 경로 대체 테스트 완료!")


if __name__ == "__main__":
    main_feedback_test()

🚀 피드백 기반 경로 대체 테스트 시작!
📱 사용 디바이스: cpu

🗓️ 초기 여행 일정 (최적화):

📅 Day 1:
 - [634] 그랜드 인터컨티넨탈 서울 파르나스
 - [2399] 그랜드 하얏트 서울
 - [2367] 논 드라이
 - [7810] 보문사
 - [1799] 조약돌 숯불닭갈비
 - [3168] 경포대
 - [350] 이인 휴게소 천안 방향

📅 Day 2:
 - [8094] 동구 공영주차빌딩
 - [5015] 사천 시외버스터미널
 - [184] 제주 국제공항

🔄 피드백 라운드 1
✅ 피드백 업데이트 완료: 좋아요 1개, 싫어요 2개

🎯 피드백 반영 후 최적화된 여행 일정:

📅 Day 1:
 - [4098] 잠실종합운동장
 - [982] 코엑스
 - [8233] 중랑 아트센터
 - [187] 서울시립미술관 서소문 본관
 - [9178] 인천 SSG 랜더스 필드
 - [432] 콩치노 콘크리트
 - [1446] 가평 뮤직 빌리지 음악 역 1939
 - [8200] 이재효 갤러리
 - [6217] 핑크 타이거
 - [184] 제주 국제공항

🔄 피드백 라운드 2
✅ 피드백 업데이트 완료: 좋아요 0개, 싫어요 2개

🎯 피드백 반영 후 최적화된 여행 일정:

📅 Day 1:
 - [8094] 동구 공영주차빌딩
 - [184] 제주 국제공항

📅 Day 2:
 - [4097] 천안삼거리휴게소 서울 방향
 - [350] 이인 휴게소 천안 방향
 - [2101] 서울 갈산 초등학교 후문
 - [2399] 그랜드 하얏트 서울
 - [3047] 익지 장사
 - [8028] 대룡시장
 - [6684] 이마트 춘천점
 - [3168] 경포대

🔄 피드백 라운드 3
✅ 피드백 업데이트 완료: 좋아요 2개, 싫어요 1개

🎯 피드백 반영 후 최적화된 여행 일정:

📅 Day 1:
 - [570] 비케이 에너지 석산 주유소
 - [3264] CU 여의영무예다음점
 - [2043] 생극 농협 하나로마트
 - [8551] 광개토태왕 광장
 - [2399] 그랜

In [10]:
# save_model_and_data.py
import pandas as pd
import numpy as np
import torch
import pickle
from sklearn.preprocessing import StandardScaler, RobustScaler
from torch_geometric.data import HeteroData

def save_model_and_data():
    """모델과 필요한 데이터들을 저장"""
    
    # 1. 데이터 로딩
    print("📊 데이터 로딩 중...")
    move_path = "../data/VL_csv/move_with_new_id_final.csv"
    travel_path = "tn_travel_processed.csv"
    visit_area_path = "../data/VL_csv/visit_area_with_new_id_final.csv"
    
    move_df = pd.read_csv(move_path)
    travel_df = pd.read_csv(travel_path)
    visit_area_df = pd.read_csv(visit_area_path)
    
    # 2. 데이터 전처리
    print("⚙️ 데이터 전처리 중...")
    processor = EnhancedDataProcessor()
    visit_area_tensor = processor.process_visit_area_features(visit_area_df)
    edge_index, edge_attr = processor.create_enhanced_edges(move_df, visit_area_df)
    
    # 3. 그래프 데이터 생성
    data = HeteroData()
    data['visit_area'].x = torch.tensor(visit_area_tensor, dtype=torch.float32)
    data['visit_area', 'moved_to', 'visit_area'].edge_index = edge_index
    data['visit_area', 'moved_to', 'visit_area'].edge_attr = edge_attr
    
    # 4. 모델 초기화 (실제로는 학습된 모델을 사용)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ImprovedTravelGNN(
        in_channels=visit_area_tensor.shape[1],
        hidden_channels=128,
        out_channels=64,
        travel_context_dim=25,  # travel feature 개수
        num_heads=4,
        dropout=0.2
    ).to(device)
    
    # 5. 저장할 데이터 준비
    save_data = {
        'visit_area_df': visit_area_df,
        'graph_data': data,
        'visit_scaler': processor.visit_scaler,
        'travel_scaler': processor.travel_scaler,
        'device': str(device)
    }
    
    # 6. 파일 저장
    print("💾 파일 저장 중...")
    
    # 모델 저장 (.pt)
    torch.save({
        'model_state_dict': model.state_dict(),
        'model_config': {
            'in_channels': visit_area_tensor.shape[1],
            'hidden_channels': 128,
            'out_channels': 64,
            'travel_context_dim': 25,
            'num_heads': 4,
            'dropout': 0.2
        }
    }, 'travel_recommendation_model.pt')
    
    # 데이터 저장 (.pkl)
    with open('travel_data.pkl', 'wb') as f:
        pickle.dump(save_data, f)
    
    print("✅ 저장 완료!")
    print("- travel_recommendation_model.pt: 모델 파라미터")
    print("- travel_data.pkl: 전처리된 데이터 및 스케일러")

if __name__ == "__main__":
    save_model_and_data()

📊 데이터 로딩 중...
⚙️ 데이터 전처리 중...
💾 파일 저장 중...
✅ 저장 완료!
- travel_recommendation_model.pt: 모델 파라미터
- travel_data.pkl: 전처리된 데이터 및 스케일러
