In [1]:
from typing import List, Dict, Optional
from huggingface_hub import login, HfApi, get_repo_discussions, get_discussion_details
from huggingface_hub.utils import HfHubHTTPError
import logging

# 只在出错时输出
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)


class HuggingFaceDiscussionFetcher:
    """用于获取 HuggingFace 模型讨论的类"""
    
    def __init__(self, token: Optional[str] = None, verbose: bool = False):
        """
        初始化 fetcher
        
        Args:
            token: HuggingFace API token
            verbose: 是否显示详细日志
        """
        if token:
            login(token=token)
        self.api = HfApi()
        self.verbose = verbose
    
    def search_models(self, query: str) -> List[str]:
        """搜索模型"""
        try:
            results = self.api.list_models(search=query)
            model_ids = [model.id for model in results]
            if self.verbose:
                print(f"找到 {len(model_ids)} 个模型")
            return model_ids
        except Exception as e:
            logger.error(f"搜索模型出错: {e}")
            return []
    
    def fetch_discussions_for_model(self, model_id: str) -> List[Dict]:
        """获取单个模型的所有讨论"""
        discussions_data = []
        
        try:
            discussions = list(get_repo_discussions(repo_id=model_id))
            
            if not discussions:
                return discussions_data
            
            for discussion in discussions:
                try:
                    discussion_details = get_discussion_details(
                        repo_id=model_id, 
                        discussion_num=discussion.num
                    )
                    
                    for event in discussion_details.events:
                        if hasattr(event, 'content') and event.content:
                            discussions_data.append({
                                "model_id": model_id,
                                "discussion_title": discussion.title,
                                "discussion_status": discussion.status,
                                "event_type": event.type,
                                "author": event.author,
                                "created_at": event.created_at,
                                "content": event.content
                            })
                
                except HfHubHTTPError:
                    continue
            
        except HfHubHTTPError:
            pass
        except Exception as e:
            logger.error(f"处理 {model_id} 出错: {e}")
        
        return discussions_data
    
    def fetch_all_discussions(self, query: str) -> List[Dict]:
        """获取所有匹配模型的讨论"""
        all_discussions = []
        model_ids = self.search_models(query)
        
        if not model_ids:
            return all_discussions
        
        for model_id in model_ids:
            discussions = self.fetch_discussions_for_model(model_id)
            all_discussions.extend(discussions)
        
        return all_discussions


if __name__ == "__main__":
    import os
    import pandas as pd
    
    token = os.getenv("HUGGINGFACE_TOKEN", "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")
    
    fetcher = HuggingFaceDiscussionFetcher(token=token, verbose=False)
    discussions = fetcher.fetch_all_discussions("ERNIE-4.5")
    
    df = pd.DataFrame(discussions)
    
    if not df.empty:
        print(f"获取到 {len(df)} 条数据")
    else:
        print("未获取到任何数据")





KeyboardInterrupt: 

In [None]:
df.to_xlsx("./HuggingFace.xlsx", index = False)

NameError: name 'df' is not defined