In [1]:
from abc import ABC, abstractmethod
import numpy as np
from typing import List, Dict, Optional
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

class TextVectorizer(ABC):
    @abstractmethod
    def vectorize(self, text: str) -> np.ndarray:
        pass

class BGEVectorizer(TextVectorizer):
    def __init__(self, model_path: str):
        # 初始化BGE模型
        self.model_path = model_path
        # 這裡應該加載模型，但由於依賴外部庫，先略過
        pass
    
    def vectorize(self, text: str) -> np.ndarray:
        # 實際應用中使用BGE模型轉換
        # 這裡用隨機向量模擬
        return np.random.rand(1, 768)  # BGE通常輸出768維向量

class TravelItem:
    def __init__(self, 
                 name: str, 
                 description: str, 
                 location: tuple,
                 category: str,
                 price: float,
                 rating: float = 0.0,
                 reviews: List[str] = None):
        self.name = name
        self.description = description
        self.location = location
        self.category = category
        self.price = price
        self.rating = rating
        self.reviews = reviews or []
        self.vector = None
        self.popularity_score = 0.0
        
    def set_vector(self, vector: np.ndarray):
        self.vector = vector
        
    def update_rating(self, new_rating: float):
        self.rating = new_rating
        
    def add_review(self, review: str):
        self.reviews.append(review)
        
    def calculate_popularity_score(self):
        # 簡單的流行度計算公式
        self.popularity_score = (self.rating * 0.7) + (len(self.reviews) * 0.3)
        
class TravelRecommender:
    def __init__(self, vectorizer: TextVectorizer):
        self.vectorizer = vectorizer
        self.items: List[TravelItem] = []
        self.item_vectors: Optional[np.ndarray] = None
        
    def add_item(self, item: TravelItem):
        # 向量化項目描述
        vector = self.vectorizer.vectorize(item.description)
        item.set_vector(vector)
        self.items.append(item)
        
    def build_vector_matrix(self):
        # 構建向量矩陣用於快速相似度計算
        self.item_vectors = np.vstack([item.vector for item in self.items])
        
    def find_similar_items(self, query_text: str, top_k: int = 5) -> List[TravelItem]:
        query_vector = self.vectorizer.vectorize(query_text)
        similarities = cosine_similarity(query_vector, self.item_vectors)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.items[i] for i in top_indices]
    
    def recommend_by_location(self, target_location: tuple, radius: float) -> List[TravelItem]:
        """基於位置的推薦"""
        recommended = []
        for item in self.items:
            distance = np.sqrt(
                (item.location[0] - target_location[0])**2 + 
                (item.location[1] - target_location[1])**2
            )
            if distance <= radius:
                recommended.append(item)
        return sorted(recommended, key=lambda x: x.popularity_score, reverse=True)
    
    def recommend_by_budget(self, max_budget: float) -> List[TravelItem]:
        """基於預算的推薦"""
        return sorted(
            [item for item in self.items if item.price <= max_budget],
            key=lambda x: x.rating,
            reverse=True
        )

class TravelPlan:
    def __init__(self):
        self.items: List[TravelItem] = []
        self.total_cost: float = 0.0
        self.route: List[tuple] = []
        
    def add_item(self, item: TravelItem):
        self.items.append(item)
        self.total_cost += item.price
        self.route.append(item.location)
        
    def optimize_route(self):
        """簡單的路線優化（這裡可以實現TSP算法）"""
        pass
    
    def get_summary(self) -> Dict:
        return {
            "total_items": len(self.items),
            "total_cost": self.total_cost,
            "locations": self.route,
            "items": [(item.name, item.category) for item in self.items]
        }

class UserProfile:
    def __init__(self, user_id: str):
        self.user_id = user_id
        self.preferences: Dict[str, float] = {}  # 類別偏好權重
        self.visited_items: List[TravelItem] = []
        self.budget_range: tuple = (0, float('inf'))
        
    def update_preference(self, category: str, weight: float):
        self.preferences[category] = weight
        
    def add_visited_item(self, item: TravelItem):
        self.visited_items.append(item)
        
    def set_budget_range(self, min_budget: float, max_budget: float):
        self.budget_range = (min_budget, max_budget)

# 使用示例
def main():
    # 初始化向量器
    vectorizer = BGEVectorizer("path_to_model")
    
    # 初始化推薦系統
    recommender = TravelRecommender(vectorizer)
    
    # 創建一些旅遊項目
    item1 = TravelItem(
        name="台北101",
        description="台北標誌性建築，擁有觀景台和美食街",
        location=(25.0337, 121.5645),
        category="景點",
        price=600.0,
        rating=4.5
    )
    
    # 添加項目到推薦系統
    recommender.add_item(item1)
    
    # 構建向量矩陣
    recommender.build_vector_matrix()
    
    # 創建用戶檔案
    user = UserProfile("user1")
    user.set_budget_range(0, 1000)
    user.update_preference("景點", 0.8)
    
    # 創建旅遊計劃
    plan = TravelPlan()
    plan.add_item(item1)
    
    # 獲取計劃摘要
    summary = plan.get_summary()
    print(summary)

if __name__ == "__main__":
    main()


{'total_items': 1, 'total_cost': 600.0, 'locations': [(25.0337, 121.5645)], 'items': [('台北101', '景點')]}


In [None]:
pip install transformers
pip install torch
pip install sentence-transformers


In [3]:
from abc import ABC, abstractmethod
import numpy as np
from typing import List, Dict, Optional, Union
from sklearn.metrics.pairwise import cosine_similarity
import torch
from sentence_transformers import SentenceTransformer
import pandas as pd
from tqdm import tqdm

class TextVectorizer(ABC):
    @abstractmethod
    def vectorize(self, text: str) -> np.ndarray:
        pass
    
    @abstractmethod
    def batch_vectorize(self, texts: List[str]) -> np.ndarray:
        pass

class BGEVectorizer(TextVectorizer):
    def __init__(self, model_name: str = "BAAI/bge-large-zh-v1.5"):
        """
        初始化BGE向量化器
        
        Args:
            model_name: 模型名稱，默認使用中文大模型
        """
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        
        # 加載模型
        self.model = SentenceTransformer(model_name)
        self.model.to(self.device)
        
    def vectorize(self, text: str) -> np.ndarray:
        """
        將單個文本轉換為向量
        
        Args:
            text: 輸入文本
            
        Returns:
            文本的向量表示
        """
        # BGE模型推薦的前綴
        text = f"為這段文字生成表示向量：{text}"
        embeddings = self.model.encode(text, normalize_embeddings=True)
        return embeddings.reshape(1, -1)
    
    def batch_vectorize(self, texts: List[str], batch_size: int = 32) -> np.ndarray:
        """
        批量將文本轉換為向量
        
        Args:
            texts: 文本列表
            batch_size: 批次大小
            
        Returns:
            文本向量矩陣
        """
        processed_texts = [f"為這段文字生成表示向量：{text}" for text in texts]
        embeddings = self.model.encode(
            processed_texts,
            batch_size=batch_size,
            normalize_embeddings=True,
            show_progress_bar=True
        )
        return embeddings

class TravelItem:
    def __init__(self, 
                 name: str, 
                 description: str, 
                 location: tuple,
                 category: str,
                 price: float,
                 rating: float = 0.0,
                 reviews: List[str] = None,
                 images: List[str] = None,
                 tags: List[str] = None):
        self.name = name
        self.description = description
        self.location = location
        self.category = category
        self.price = price
        self.rating = rating
        self.reviews = reviews or []
        self.images = images or []
        self.tags = tags or []
        self.vector = None
        self.popularity_score = 0.0
        self.calculate_popularity_score()
        
    def set_vector(self, vector: np.ndarray):
        self.vector = vector
        
    def update_rating(self, new_rating: float):
        self.rating = new_rating
        self.calculate_popularity_score()
        
    def add_review(self, review: str):
        self.reviews.append(review)
        self.calculate_popularity_score()
        
    def calculate_popularity_score(self):
        """計算景點流行度分數"""
        review_score = min(len(self.reviews) / 100, 1.0)  # 標準化評論數量
        self.popularity_score = (self.rating * 0.6) + (review_score * 0.4)
        
    def to_dict(self) -> Dict:
        """將景點信息轉換為字典格式"""
        return {
            "name": self.name,
            "description": self.description,
            "location": self.location,
            "category": self.category,
            "price": self.price,
            "rating": self.rating,
            "review_count": len(self.reviews),
            "popularity_score": self.popularity_score,
            "tags": self.tags
        }

class TravelRecommender:
    def __init__(self, vectorizer: TextVectorizer):
        self.vectorizer = vectorizer
        self.items: List[TravelItem] = []
        self.item_vectors: Optional[np.ndarray] = None
        self.category_weights = {
            "景點": 1.0,
            "美食": 1.0,
            "住宿": 1.0,
            "購物": 1.0,
            "娛樂": 1.0
        }
        
    def add_item(self, item: TravelItem):
        """添加單個旅遊項目"""
        vector = self.vectorizer.vectorize(item.description)
        item.set_vector(vector)
        self.items.append(item)
        
    def add_items_batch(self, items: List[TravelItem]):
        """批量添加旅遊項目"""
        descriptions = [item.description for item in items]
        vectors = self.vectorizer.batch_vectorize(descriptions)
        
        for item, vector in zip(items, vectors):
            item.set_vector(vector.reshape(1, -1))
            self.items.append(item)
        
    def build_vector_matrix(self):
        """構建向量矩陣"""
        self.item_vectors = np.vstack([item.vector for item in self.items])
        
    def find_similar_items(self, 
                          query_text: str, 
                          top_k: int = 5,
                          category: Optional[str] = None,
                          max_price: Optional[float] = None) -> List[Dict]:
        """
        查找相似的旅遊項目
        
        Args:
            query_text: 查詢文本
            top_k: 返回結果數量
            category: 可選的類別過濾
            max_price: 可選的最高價格限制
            
        Returns:
            相似項目列表
        """
        query_vector = self.vectorizer.vectorize(query_text)
        similarities = cosine_similarity(query_vector, self.item_vectors)[0]
        
        # 結合相似度和其他因素
        final_scores = similarities.copy()
        for i, item in enumerate(self.items):
            # 應用類別權重
            category_weight = self.category_weights.get(item.category, 1.0)
            final_scores[i] *= category_weight
            
            # 應用流行度調整
            final_scores[i] *= (1 + item.popularity_score * 0.2)  # 流行度佔20%權重
            
            # 價格過濾
            if max_price and item.price > max_price:
                final_scores[i] = -1
                
            # 類別過濾
            if category and item.category != category:
                final_scores[i] = -1
                
        # 獲取top_k個結果
        top_indices = np.argsort(final_scores)[-top_k:][::-1]
        results = []
        
        for idx in top_indices:
            if final_scores[idx] > 0:  # 只返回有效的結果
                item_dict = self.items[idx].to_dict()
                item_dict["similarity_score"] = similarities[idx]
                results.append(item_dict)
                
        return results
    
    def recommend_by_location(self, 
                            target_location: tuple, 
                            radius: float,
                            limit: int = 10) -> List[Dict]:
        """基於位置的推薦"""
        recommended = []
        for item in self.items:
            distance = np.sqrt(
                (item.location[0] - target_location[0])**2 + 
                (item.location[1] - target_location[1])**2
            )
            if distance <= radius:
                item_dict = item.to_dict()
                item_dict["distance"] = distance
                recommended.append(item_dict)
                
        # 根據距離和流行度排序
        recommended.sort(key=lambda x: (x["distance"], -x["popularity_score"]))
        return recommended[:limit]
    
    def recommend_itinerary(self,
                          start_location: tuple,
                          duration_days: int,
                          budget_per_day: float,
                          preferences: Dict[str, float]) -> List[List[Dict]]:
        """
        推薦完整行程
        
        Args:
            start_location: 起始位置
            duration_days: 行程天數
            budget_per_day: 每日預算
            preferences: 類別偏好權重
            
        Returns:
            按天分組的推薦行程
        """
        # 更新類別權重
        self.category_weights.update(preferences)
        
        itinerary = []
        current_location = start_location
        
        for day in range(duration_days):
            day_plan = []
            daily_budget = budget_per_day
            
            # 上午活動（景點為主）
            morning = self.recommend_by_location(
                current_location,
                radius=5.0,  # 5公里範圍內
                limit=2
            )
            for item in morning:
                if item["price"] <= daily_budget:
                    day_plan.append(item)
                    daily_budget -= item["price"]
                    current_location = item["location"]
            
            # 中午用餐（美食為主）
            lunch = self.recommend_by_location(
                current_location,
                radius=2.0,
                limit=1
            )
            for item in lunch:
                if item["price"] <= daily_budget:
                    day_plan.append(item)
                    daily_budget -= item["price"]
                    current_location = item["location"]
            
            # 下午活動
            afternoon = self.recommend_by_location(
                current_location,
                radius=5.0,
                limit=2
            )
            for item in afternoon:
                if item["price"] <= daily_budget:
                    day_plan.append(item)
                    daily_budget -= item["price"]
                    current_location = item["location"]
            
            itinerary.append(day_plan)
            
        return itinerary

# 使用示例
def main():
    # 初始化向量器
    vectorizer = BGEVectorizer()
    
    # 初始化推薦系統
    recommender = TravelRecommender(vectorizer)
    
    # 創建示例數據
    travel_items = [
        TravelItem(
            name="台北101",
            description="台北標誌性建築，擁有觀景台和美食街，是台北必訪景點之一，可以俯瞰整個台北市景色",
            location=(25.0337, 121.5645),
            category="景點",
            price=600.0,
            rating=4.5,
            tags=["觀景", "地標", "購物"]
        ),
        TravelItem(
            name="九份老街",
            description="充滿懷舊氛圍的山城老街，有許多特色小吃和紀念品店，是體驗台灣傳統文化的好去處",
            location=(25.1089, 121.8445),
            category="景點",
            price=0.0,
            rating=4.7,
            tags=["古蹟", "美食", "文化"]
        ),
        TravelItem(
            name="鼎泰豐",
            description="米其林星級餐廳，以小籠包聞名於世，是台北必吃美食之一",
            location=(25.0319, 121.5678),
            category="美食",
            price=500.0,
            rating=4.8,
            tags=["小籠包", "台灣美食", "米其林"]
        )
    ]
    
    # 批量添加項目
    recommender.add_items_batch(travel_items)
    
    # 構建向量矩陣
    recommender.build_vector_matrix()
    
    # 測試相似項目查詢
    query = "想找一個可以看台北夜景的地方"
    similar_items = recommender.find_similar_items(query, top_k=2)
    print("\n相似項目推薦:")
    for item in similar_items:
        print(f"- {item['name']}: {item['description']}")
    
    # 測試基於位置的推薦
    location = (25.0337, 121.5645)  # 台北101位置
    nearby_items = recommender.recommend_by_location(location, radius=2.0)
    print("\n附近景點推薦:")
    for item in nearby_items:
        print(f"- {item['name']}: {item['distance']:.2f}km")
    
    # 測試行程推薦
    preferences = {
        "景點": 0.8,
        "美食": 0.6,
        "購物": 0.4
    }
    itinerary = recommender.recommend_itinerary(
        start_location=location,
        duration_days=2,
        budget_per_day=2000.0,
        preferences=preferences
    )
    
    print("\n推薦行程:")
    for day, day_plan in enumerate(itinerary, 1):
        print(f"\n第{day}天:")
        for item in day_plan:
            print(f"- {item['name']}: ${item['price']}")

if __name__ == "__main__":
    main()


Using device: cpu


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/30.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.30G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/439k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.30G [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


相似項目推薦:
- 台北101: 台北標誌性建築，擁有觀景台和美食街，是台北必訪景點之一，可以俯瞰整個台北市景色
- 九份老街: 充滿懷舊氛圍的山城老街，有許多特色小吃和紀念品店，是體驗台灣傳統文化的好去處

附近景點推薦:
- 台北101: 0.00km
- 鼎泰豐: 0.00km
- 九份老街: 0.29km

推薦行程:

第1天:
- 台北101: $600.0
- 鼎泰豐: $500.0
- 鼎泰豐: $500.0

第2天:
- 鼎泰豐: $500.0
- 台北101: $600.0
- 台北101: $600.0


In [None]:
BGE模型整合

使用 sentence-transformers 加載 BGE 模型
支持 GPU 加速（如果可用）
實現了批量處理功能以提高效率
向量化改進

添加了文本前綴以提高向量質量
實現了批量向量化以提高效率
支持向量正規化
推薦系統增強

結合了多個因素進行排序：
文本相似度
類別權重
流行度分數
價格限制
支持多維度過濾
行程規劃功能

考慮地理位置
預算管理
類別平衡
時間安排
數據結構優化

增加了更多元數據（標籤、圖片等）
改進了評分系統
添加了序列化支持

數據準備

In [None]:
# 準備景點數據
items = [
    TravelItem(
        name="景點名稱",
        description="詳細描述...",
        location=(緯度, 經度),
        category="類別",
        price=價格,
        rating=評分,
        tags=["標籤1", "標籤2"]
    ),
    # 更多景點...
]


系統初始化

In [None]:
vectorizer = BGEVectorizer()
recommender = TravelRecommender(vectorizer)
recommender.add_items_batch(items)
recommender.build_vector_matrix()


推薦功能

In [None]:
# 文本查詢
results = recommender.find_similar_items(
    query_text="想去看夜景",
    top_k=5,
    category="景點",
    max_price=1000
)

# 位置查詢
nearby = recommender.recommend_by_location(
    target_location=(25.0337, 121.5645),
    radius=2.0
)

# 行程規劃
itinerary = recommender.recommend_itinerary(
    start_location=(25.0337, 121.5645),
    duration_days=3,
    budget_per_day=2000,
    preferences={"景點": 0.8, "美食": 0.6}
)


In [None]:
系統可以進一步擴展：

添加時間相關的考慮（營業時間、最佳遊覽時間等）
整合交通資訊
添加季節性推薦
實現用戶反饋機制
加入更多的個性化參數
使用這個系統時，需要注意：

BGE模型需要較大的計算資源
首次加載模型可能較慢
大量數據時需要考慮性能優化
確保數據質量和完整性