In [6]:
# web_inference.py
import pandas as pd
import numpy as np
import torch
import pickle
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import HeteroData
from datetime import datetime
from sklearn.cluster import KMeans
import random

class ImprovedTravelGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, travel_context_dim, 
                 num_heads=4, dropout=0.2):
        super().__init__()
        
        self.gat1 = GATConv(in_channels, hidden_channels // num_heads, 
                           heads=num_heads, dropout=dropout, concat=True)
        self.gat2 = GATConv(hidden_channels, hidden_channels // num_heads, 
                           heads=num_heads, dropout=dropout, concat=True)
        self.gat3 = GATConv(hidden_channels, out_channels, 
                           heads=1, dropout=dropout, concat=False)
        
        self.bn1 = nn.BatchNorm1d(hidden_channels)
        self.bn2 = nn.BatchNorm1d(hidden_channels)
        self.bn3 = nn.BatchNorm1d(out_channels)
        
        self.travel_encoder = nn.Sequential(
            nn.Linear(travel_context_dim, hidden_channels),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels, out_channels)
        )
        
        self.fusion_net = nn.Sequential(
            nn.Linear(out_channels * 2, hidden_channels),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels, out_channels),
            nn.ReLU(),
            nn.Linear(out_channels, out_channels)
        )
        
        self.preference_head = nn.Sequential(
            nn.Linear(out_channels, hidden_channels // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels // 2, 1),
            nn.Sigmoid()
        )
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, data, travel_context, return_attention=False):
        x = data['visit_area'].x
        edge_index = data['visit_area', 'moved_to', 'visit_area'].edge_index
        
        x1 = self.gat1(x, edge_index)
        x1 = self.bn1(x1)
        x1 = F.relu(x1)
        x1 = self.dropout(x1)
        
        x2 = self.gat2(x1, edge_index)
        x2 = self.bn2(x2)
        x2 = F.relu(x2 + x1)
        x2 = self.dropout(x2)
        
        graph_embedding = self.gat3(x2, edge_index)
        graph_embedding = self.bn3(graph_embedding)
        
        travel_embedding = self.travel_encoder(travel_context)
        travel_embedding_expanded = travel_embedding.expand(graph_embedding.size(0), -1)
        
        fused_features = torch.cat([graph_embedding, travel_embedding_expanded], dim=1)
        final_embedding = self.fusion_net(fused_features)
        
        preference_scores = self.preference_head(final_embedding)
        
        return final_embedding, preference_scores

class TravelRecommendationSystem:
    def __init__(self, model_path='travel_recommendation_model.pt', data_path='travel_data.pkl'):
        """웹용 여행 추천 시스템 초기화"""
        
        print("🚀 시스템 초기화 중...")
        
        # 1. 데이터 로드
        with open(data_path, 'rb') as f:
            self.data_dict = pickle.load(f)
        
        self.visit_area_df = self.data_dict['visit_area_df']
        self.graph_data = self.data_dict['graph_data']
        self.visit_scaler = self.data_dict['visit_scaler']
        self.travel_scaler = self.data_dict['travel_scaler']
        
        # 2. 디바이스 설정
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.graph_data = self.graph_data.to(self.device)
        
        # 3. 모델 로드
        checkpoint = torch.load(model_path, map_location=self.device)
        model_config = checkpoint['model_config']
        
        self.model = ImprovedTravelGNN(**model_config).to(self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.eval()
        
        # 4. 피드백 시스템 초기화
        self.user_feedback_history = []
        self.preference_weights = None
        self.excluded_ids = set()
        
        print("✅ 시스템 초기화 완료!")
    
    def process_travel_input(self, travel_info):
        """웹에서 받은 여행 정보 전처리"""
        
        travel_feature_cols = [
            'TOTAL_COST_BINNED_ENCODED', 'WITH_PET', 'MONTH', 'DURATION',
            'MVMN_기타', 'MVMN_대중교통', 'MVMN_자가용',
            'TRAVEL_PURPOSE_1', 'TRAVEL_PURPOSE_2', 'TRAVEL_PURPOSE_3',
            'TRAVEL_PURPOSE_4', 'TRAVEL_PURPOSE_5', 'TRAVEL_PURPOSE_6',
            'TRAVEL_PURPOSE_7', 'TRAVEL_PURPOSE_8', 'TRAVEL_PURPOSE_9',
            'WHOWITH_2인여행', 'WHOWITH_가족여행', 'WHOWITH_기타',
            'WHOWITH_단독여행', 'WHOWITH_친구/지인 여행'
        ]
        
        processed_info = travel_info.copy()
        
        # 반려동물 동반
        mission_list = processed_info['mission_ENC'].strip().split(',')
        processed_info['WITH_PET'] = 1 if '0' in mission_list else 0
            
        # 여행 목적
        for i in range(1, 10):
            processed_info[f'TRAVEL_PURPOSE_{i}'] = 1 if str(i) in mission_list else 0
            
        # 날짜 처리
        dates = processed_info['date_range'].split(' - ')
        start_date = datetime.strptime(dates[0].strip(), "%Y-%m-%d")
        end_date = datetime.strptime(dates[1].strip(), "%Y-%m-%d")
        
        processed_info['MONTH'] = end_date.month
        processed_info['DURATION'] = max(1, (end_date - start_date).days)
        
        # 교통수단
        for m in ['자가용', '대중교통', '기타']:
            processed_info[f"MVMN_{m}"] = 0
        
        if processed_info['MVMN_NM_ENC'] == '1':
            processed_info['MVMN_자가용'] = 1
        elif processed_info['MVMN_NM_ENC'] == '2':
            processed_info['MVMN_대중교통'] = 1
        else:
            processed_info['MVMN_기타'] = 1
        
        # 동행자
        whowith_onehot = [0] * 5
        idx = int(processed_info['whowith_ENC']) - 1
        if 0 <= idx < 5:
            whowith_onehot[idx] = 1
        
        processed_info.update({
            'WHOWITH_단독여행': whowith_onehot[0],
            'WHOWITH_2인여행': whowith_onehot[1],
            'WHOWITH_가족여행': whowith_onehot[2],
            'WHOWITH_친구/지인 여행': whowith_onehot[3],
            'WHOWITH_기타': whowith_onehot[4],
        })
        
        # 비용
        processed_info['TOTAL_COST_BINNED_ENCODED'] = int(processed_info['TOTAL_COST'])
        
        # 최종 벡터 생성
        travel_vector = [int(processed_info.get(k, 0)) for k in travel_feature_cols]
        
        return np.array([travel_vector]).astype(np.float32), processed_info['DURATION']
    
    def get_recommendations(self, travel_info, top_k=20, diversity_weight=0.3):
        """여행 추천 생성"""
        
        # 1. 여행 정보 전처리
        travel_tensor, duration = self.process_travel_input(travel_info)
        travel_context = torch.tensor(travel_tensor, dtype=torch.float32).to(self.device)
        
        # 2. 모델 추론
        with torch.no_grad():
            embeddings, preference_scores = self.model(self.graph_data, travel_context)
        
        scores = preference_scores.squeeze()
        
        # 3. 제외된 항목들 낮은 점수로 설정
        for exclude_id in self.excluded_ids:
            matching_indices = self.visit_area_df[
                self.visit_area_df['NEW_VISIT_AREA_ID'] == exclude_id
            ].index.tolist()
            
            for idx in matching_indices:
                if idx < len(scores):
                    scores[idx] = -1.0
        
        # 4. 피드백 기반 점수 조정
        if self.preference_weights:
            scores = self._apply_preference_weights(scores)
        
        # 5. MMR 기반 다양성 추천
        recommendations = self._mmr_selection(scores, embeddings, top_k, diversity_weight)
        
        # 6. 중복 제거 및 유효한 추천만 선택
        final_recommendations = self._filter_unique_recommendations(recommendations, max_items=15)
        
        # 7. 최적 경로 생성
        optimized_routes = self._generate_optimized_routes(final_recommendations, duration)
        
        return {
            'routes': optimized_routes,
            'recommendations_data': final_recommendations,
            'embeddings': embeddings,
            'duration': duration
        }
    
    def update_feedback(self, liked_items, disliked_items, embeddings):
        """사용자 피드백 업데이트"""
        
        # 1. 제외 목록 업데이트
        for item_id in disliked_items:
            self.excluded_ids.add(item_id)
        
        # 2. 피드백 히스토리 저장
        feedback = {
            'liked': liked_items,
            'disliked': disliked_items,
            'timestamp': datetime.now()
        }
        self.user_feedback_history.append(feedback)
        
        # 3. 선호도 가중치 업데이트
        self.preference_weights = self._calculate_preference_weights()
        
        print(f"✅ 피드백 업데이트: 좋아요 {len(liked_items)}개, 싫어요 {len(disliked_items)}개")
        
        return len(self.excluded_ids)
    
    def _apply_preference_weights(self, scores):
        """피드백 기반 점수 조정"""
        if not self.preference_weights:
            return scores
            
        adjusted_scores = scores.clone()
        
        for i, row in self.visit_area_df.iterrows():
            if i >= len(adjusted_scores):
                break
                
            area_type = row.get('VISIT_AREA_TYPE_CD', 0)
            
            if area_type in self.preference_weights.get('preferred_types', []):
                adjusted_scores[i] *= 1.3
            elif area_type in self.preference_weights.get('avoided_types', []):
                adjusted_scores[i] *= 0.7
        
        return adjusted_scores
    
    def _mmr_selection(self, scores, embeddings, top_k, diversity_weight):
        """MMR 기반 다양성 선택"""
        recommendations = []
        remaining_indices = list(range(len(scores)))
        
        # 제외된 인덱스 제거
        for exclude_id in self.excluded_ids:
            matching_indices = self.visit_area_df[
                self.visit_area_df['NEW_VISIT_AREA_ID'] == exclude_id
            ].index.tolist()
            for idx in matching_indices:
                if idx in remaining_indices:
                    remaining_indices.remove(idx)
        
        # 첫 번째 추천
        if remaining_indices:
            valid_scores = [(i, scores[i].item()) for i in remaining_indices if scores[i].item() >= 0]
            if valid_scores:
                best_idx = max(valid_scores, key=lambda x: x[1])[0]
                recommendations.append(best_idx)
                remaining_indices.remove(best_idx)
        
        # 나머지 MMR 선택
        for _ in range(min(top_k - 1, len(remaining_indices))):
            if not remaining_indices:
                break
            
            best_score = -float('inf')
            best_idx = None
            
            for idx in remaining_indices:
                if scores[idx].item() < 0:
                    continue
                
                relevance = scores[idx].item()
                
                # 다양성 계산
                similarities = []
                for rec_idx in recommendations:
                    sim = F.cosine_similarity(
                        embeddings[idx:idx+1], 
                        embeddings[rec_idx:rec_idx+1]
                    ).item()
                    similarities.append(sim)
                
                diversity = 1 - max(similarities) if similarities else 1
                final_score = (1 - diversity_weight) * relevance + diversity_weight * diversity
                
                if final_score > best_score:
                    best_score = final_score
                    best_idx = idx
            
            if best_idx is not None:
                recommendations.append(best_idx)
                remaining_indices.remove(best_idx)
        
        return recommendations
    
    def _filter_unique_recommendations(self, recommendations, max_items=10):
        """중복 제거 및 유효한 추천 필터링"""
        unique_recommendations = []
        seen_ids = set()
        
        for idx in recommendations:
            if idx < len(self.visit_area_df):
                row = self.visit_area_df.iloc[idx]
                area_id = row['NEW_VISIT_AREA_ID']
                
                if area_id not in seen_ids and area_id not in self.excluded_ids and area_id != 0:
                    unique_recommendations.append({
                        'idx': idx,
                        'id': area_id,
                        'name': row['VISIT_AREA_NM'],
                        'coords': [row['X_COORD'], row['Y_COORD']],
                        'type': row.get('VISIT_AREA_TYPE_CD', 0)
                    })
                    seen_ids.add(area_id)
                
                if len(unique_recommendations) >= max_items:
                    break
        
        return unique_recommendations
    
    def _generate_optimized_routes(self, recommendations, duration):
        """최적화된 일별 경로 생성"""
        if not recommendations:
            return {}
        
        coords = np.array([rec['coords'] for rec in recommendations])
        coords = np.nan_to_num(coords, nan=0.0)
        
        # K-means 클러스터링
        n_clusters = min(duration, len(recommendations))
        if n_clusters > 1:
            kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
            day_labels = kmeans.fit_predict(coords)
        else:
            day_labels = np.zeros(len(recommendations))
        
        # 일별 그룹 생성
        daily_groups = {}
        for i, rec in enumerate(recommendations):
            day = int(day_labels[i])
            if day not in daily_groups:
                daily_groups[day] = []
            daily_groups[day].append(rec)
        
        # 단일 장소 일정 재배치
        for day, locs in list(daily_groups.items()):
            if len(locs) == 1 and len(daily_groups) > 1:
                target_day = min(daily_groups.keys())
                daily_groups[target_day].extend(locs)
                del daily_groups[day]
        
        # TSP 최적화
        optimized_routes = {}
        for day, locations in daily_groups.items():
            if len(locations) > 1:
                optimized_routes[day] = self._solve_tsp_simple(locations)
            else:
                optimized_routes[day] = locations
        
        return optimized_routes
    
    def _solve_tsp_simple(self, locations):
        """간단한 TSP 해법 (Nearest Neighbor)"""
        if len(locations) <= 2:
            return locations
        
        coords = np.array([loc['coords'] for loc in locations])
        n = len(coords)
        
        # 거리 행렬
        dist_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(n):
                dist_matrix[i][j] = np.linalg.norm(coords[i] - coords[j])
        
        # Nearest Neighbor 알고리즘
        unvisited = set(range(1, n))
        current = 0
        route = [0]
        
        while unvisited:
            nearest = min(unvisited, key=lambda x: dist_matrix[current][x])
            route.append(nearest)
            unvisited.remove(nearest)
            current = nearest
        
        return [locations[i] for i in route]
    
    def _calculate_preference_weights(self):
        """피드백 히스토리 기반 선호도 가중치 계산"""
        if not self.user_feedback_history:
            return None
        
        liked_types = []
        disliked_types = []
        
        for feedback in self.user_feedback_history:
            for area_id in feedback['liked']:
                matching_rows = self.visit_area_df[
                    self.visit_area_df['NEW_VISIT_AREA_ID'] == area_id
                ]
                if not matching_rows.empty:
                    liked_types.append(matching_rows.iloc[0].get('VISIT_AREA_TYPE_CD', 0))
            
            for area_id in feedback['disliked']:
                matching_rows = self.visit_area_df[
                    self.visit_area_df['NEW_VISIT_AREA_ID'] == area_id
                ]
                if not matching_rows.empty:
                    disliked_types.append(matching_rows.iloc[0].get('VISIT_AREA_TYPE_CD', 0))
        
        return {
            'preferred_types': list(set(liked_types)),
            'avoided_types': list(set(disliked_types))
        }
    
    def reset_feedback(self):
        """피드백 초기화"""
        self.user_feedback_history = []
        self.preference_weights = None
        self.excluded_ids = set()
        print("🔄 피드백 시스템 초기화 완료")

# 웹 사용 예시
if __name__ == "__main__":
    # 시스템 초기화
    recommender = TravelRecommendationSystem()
    
    # 여행 정보 예시
    travel_example = {
        'mission_ENC': '0,1,2',
        'date_range': '2025-09-28 - 2025-09-30',
        'TOTAL_COST': '2',
        'MVMN_NM_ENC': '2',
        'whowith_ENC': '2'
    }
    
    # 초기 추천
    result = recommender.get_recommendations(travel_example)
    print("🗓️ 추천 일정:")
    for day, route in result['routes'].items():
        print(f"Day {day + 1}: {[loc['name'] for loc in route]}")
    
    # 피드백 처리 (예시)
    liked_ids = [result['recommendations_data'][0]['id']]
    disliked_ids = [result['recommendations_data'][1]['id']]
    
    recommender.update_feedback(liked_ids, disliked_ids, result['embeddings'])
    
    # 재추천
    result2 = recommender.get_recommendations(travel_example)
    print("\n🔄 피드백 반영 후:")
    for day, route in result2['routes'].items():
        print(f"Day {day + 1}: {[loc['name'] for loc in route]}")

🚀 시스템 초기화 중...
✅ 시스템 초기화 완료!
🗓️ 추천 일정:
Day 1: ['제주 국제공항', '서가 앤 쿡 KTX 서울 역사점', '삼진 어묵 부산역 광장점']
Day 2: ['현대 프리미엄 아울렛 스페이스 원', '코엑스', '장인 닭갈비 강남점', '서울랜드', '왕생가 칼국수', '연천 호로 고루', '화개산 손칼국수', '투썸플레이스 화성사 강점', '아산역', '고향식당', '롯데리아 철원 와수점', '경포대']
✅ 피드백 업데이트: 좋아요 1개, 싫어요 1개

🔄 피드백 반영 후:
Day 2: ['제주 국제공항', '순천역', '통영종합버스터미널', '부산역', '포항역']
Day 1: ['상계역 4호선', '먹골역 7호선', '아이테코', '서울 갈산 초등학교 후문', '인천역 1호선', '왕산 마리나항', '만호 LPG 충전소', '가평휴게소 춘천 방향', '산정호수 하동 주차장', '강릉 대관령휴게소 인천방향']
