In [3]:
import os
import requests
import json
import re
import time
from typing import List, Dict, Tuple
from tqdm import tqdm

print("所有库已成功导入！")

所有库已成功导入！


In [6]:
# 配置参数
MODEL_NAME = "deepseek-r1:5b"
OLLAMA_API_URL = "http://localhost:11434/api/chat"  # Ollama API默认地址
TIMEOUT = 60
RETRIES = 3
NEWS_FILE = "news_data.txt"  # 新闻数据文件
LABELS_FILE = "labels.txt"  # 标签文件
OUTPUT_FILE = "news_analysis_results.json"
LOG_FILE = "model_inference.log"

In [7]:
def log(message: str, level: str = "INFO"):
    """带时间戳的日志输出，同时写入日志文件"""
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    log_msg = f"[{timestamp}] [{level}] {message}"
    print(log_msg)

    # 写入日志文件
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(log_msg + "\n")


def call_ollama_api(prompt: str) -> str:
    """使用HTTP API调用Ollama模型"""
    start_time = time.time()
    log(f"开始调用模型: {MODEL_NAME}", "DEBUG")

    for attempt in range(RETRIES):
        try:
            # 构建API请求
            payload = {
                "model": MODEL_NAME,
                "messages": [{"role": "user", "content": prompt}],
                "stream": False  # 非流式响应
            }

            log(f"发送API请求 (尝试 {attempt + 1}/{RETRIES})", "DEBUG")
            response = requests.post(
                OLLAMA_API_URL,
                json=payload,
                timeout=TIMEOUT
            )

            duration = time.time() - start_time

            if response.status_code != 200:
                log(f"API调用失败 (状态码 {response.status_code}): {response.text}", "ERROR")
                time.sleep(2)
                continue

            # 解析响应
            data = response.json()
            message_content = data.get("message", {}).get("content", "")

            log(f"模型返回成功 (长度: {len(message_content)} 字符, 耗时: {duration:.2f}秒)", "INFO")
            return message_content

        except requests.Timeout:
            log(f"API请求超时 ({TIMEOUT}秒)", "ERROR")
            continue
        except Exception as e:
            log(f"API调用异常: {type(e).__name__} - {str(e)}", "ERROR")
            time.sleep(2)

    log("达到最大重试次数，返回空结果", "ERROR")
    return ""


def read_news_file() -> List[str]:
    """从文件读取新闻数据"""
    log(f"从文件读取新闻数据: {NEWS_FILE}", "INFO")

    try:
        # 读取新闻数据
        with open(NEWS_FILE, "r", encoding="utf-8") as f:
            news_list = [line.strip() for line in f.readlines() if line.strip()]

        if not news_list:
            log("新闻文件为空，程序退出", "ERROR")
            exit(1)

        log(f"成功读取 {len(news_list)} 条新闻", "INFO")
        return news_list

    except Exception as e:
        log(f"读取新闻文件时出错: {str(e)}", "ERROR")
        exit(1)


def read_labels_file() -> List[int]:
    """从文件读取标签数据"""
    log(f"从文件读取标签数据: {LABELS_FILE}", "INFO")
    try:
        with open(LABELS_FILE, "r", encoding="utf-8") as f:
            labels = [int(line.strip()) for line in f.readlines() if line.strip().isdigit()]
        if not labels:
            log("标签文件为空或无有效标签，程序退出", "ERROR")
            exit(1)
        if len(labels) != len(read_news_file()):
            log("新闻数量与标签数量不匹配，程序退出", "ERROR")
            exit(1)
        log(f"成功读取 {len(labels)} 个标签", "INFO")
        return labels
    except Exception as e:
        log(f"读取标签文件时出错: {str(e)}", "ERROR")
        exit(1)


def analyze_news(news_list: List[str], task_type: str) -> List:
    """分析新闻的通用函数，支持不同任务类型"""
    results = []
    log(f"开始执行任务: {task_type}")

    for i, news in enumerate(tqdm(news_list, desc=task_type)):
        # 根据任务类型构建不同的prompt
        if task_type == "真假判别":
            prompt = f"""
            请判断以下新闻的真假，仅输出0或1：
            新闻：{news}
            """
        elif task_type == "情感分析":
            prompt = f"""
            请分析以下新闻的情感倾向，仅输出"积极"、"消极"或"中性"：
            新闻：{news}
            """
        else:
            log(f"不支持的任务类型: {task_type}", "ERROR")
            continue

        # 添加任务标识符到日志
        log(f"处理新闻 {i + 1}/{len(news_list)} (任务: {task_type})", "INFO")

        response = call_ollama_api(prompt)

        if task_type == "真假判别":
            result = parse_prediction(response)
        elif task_type == "情感分析":
            result = parse_sentiment(response)

        results.append(result)

        # 每处理5条新闻，显示一次进度
        if (i + 1) % 5 == 0:
            log(f"已处理 {i + 1}/{len(news_list)} 条新闻", "INFO")

    return results


def parse_prediction(response: str) -> int:
    """解析模型返回的真假判别结果"""
    match = re.search(r'\b(0|1)\b', response)
    if match:
        log(f"解析预测结果: {match.group(1)}", "DEBUG")
        return int(match.group(1))
    else:
        log(f"无法解析预测结果: {response[:50]}...", "WARNING")
        return 0


def parse_sentiment(response: str) -> str:
    """解析模型返回的情感分析结果"""
    if re.search(r'\b(积极|正面|好|支持)\b', response, re.IGNORECASE):
        return "积极"
    elif re.search(r'\b(消极|负面|坏|反对)\b', response, re.IGNORECASE):
        return "消极"
    else:
        return "中性"


def calculate_accuracy(predictions: List[int], labels: List[int]) -> Dict[str, float]:
    """计算准确率指标"""
    total = len(labels)
    if total == 0:
        return {"Accuracy": 0, "Accuracy_fake": 0, "Accuracy_true": 0}

    correct = sum(1 for p, t in zip(predictions, labels) if p == t)
    total_fake = sum(1 for t in labels if t == 0)
    total_true = sum(1 for t in labels if t == 1)

    correct_fake = sum(1 for p, t in zip(predictions, labels) if p == 0 and t == 0)
    correct_true = sum(1 for p, t in zip(predictions, labels) if p == 1 and t == 1)

    accuracy = correct / total
    accuracy_fake = correct_fake / total_fake if total_fake > 0 else 0
    accuracy_true = correct_true / total_true if total_true > 0 else 0

    return {
        "Accuracy": accuracy,
        "Accuracy_fake": accuracy_fake,
        "Accuracy_true": accuracy_true
    }




In [None]:
def main():
    print("\n=== 离线DeepSeek新闻分析系统 ===")
    print(f"使用模型: {MODEL_NAME}")
    print(f"API地址: {OLLAMA_API_URL}")
    print(f"新闻文件: {NEWS_FILE}")
    print(f"标签文件: {LABELS_FILE}")
    print("=" * 50)

    # 检查Ollama API是否可用
    try:
        response = requests.get(f"{OLLAMA_API_URL}/../tags", timeout=10)
        if response.status_code != 200:
            log(f"无法连接到Ollama API: {response.text}", "ERROR")
            exit(1)

        models = [model["name"] for model in response.json().get("models", [])]
        if MODEL_NAME not in models:
            log(f"未找到模型 {MODEL_NAME}，请确保已正确下载", "ERROR")
            exit(1)

        log(f"成功连接到Ollama API，可用模型: {', '.join(models)}", "INFO")
    except Exception as e:
        log(f"检查Ollama API时出错: {str(e)}", "ERROR")
        exit(1)

    # 从文件读取新闻
    news_list = read_news_file()
    # 从文件读取标签
    labels = read_labels_file()

    # 任务1：基础真假判别
    print("\n=== 执行任务1：基础真假判别 ===")
    predictions = analyze_news(news_list, "真假判别")

    # 计算准确率
    accuracy_metrics = calculate_accuracy(predictions, labels)
    print("\n=== 准确率计算结果 ===")
    print(f"Accuracy: {accuracy_metrics['Accuracy']}")
    print(f"Accuracy_fake: {accuracy_metrics['Accuracy_fake']}")
    print(f"Accuracy_true: {accuracy_metrics['Accuracy_true']}")

    # 任务2：情感分析
    print("\n=== 执行任务2：情感分析 ===")
    sentiments = analyze_news(news_list, "情感分析")

    # 保存结果
    results = {
        "news_data": news_list,
        "predictions": predictions,
        "sentiments": sentiments,
        "accuracy_metrics": accuracy_metrics
    }

    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

    print(f"\n结果已保存到: {OUTPUT_FILE}")
    print(f"详细日志已保存到: {LOG_FILE}")
    print("分析完成！")


if __name__ == "__main__":
    main()

In [None]:
    print("\n=== 准确率计算结果 ===")
    print(f"Accuracy: {accuracy_metrics['Accuracy']}")
    print(f"Accuracy_fake: {accuracy_metrics['Accuracy_fake']}")
    print(f"Accuracy_true: {accuracy_metrics['Accuracy_true']}")



=== 准确率计算结果 ===
accuracy: 0.51 accuracy_true: 0.55 accuracy_fake: 0.47


In [14]:
print("accuracy:" ,126/200,
        "accuracy_true:" ,58/100,
        "accuracy_fake:" ,68/100)

accuracy: 0.63 accuracy_true: 0.58 accuracy_fake: 0.68


In [21]:
import os
import json
import requests
import time
from typing import List, Dict, Tuple, Optional
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

class DeepSeekClient:
    """与本地部署的DeepSeek模型API交互的客户端"""
    
    def __init__(
        self,
        base_url: str = "http://localhost:8000",
        api_key: Optional[str] = None,
        timeout: int = 60,
        max_retries: int = 3,
        retry_delay: float = 1.0,
    ):
        """初始化DeepSeek API客户端"""
        self.base_url = base_url
        self.api_key = api_key
        self.timeout = timeout
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.headers = {"Content-Type": "application/json"}
        
        if api_key:
            self.headers["Authorization"] = f"Bearer {api_key}"
    
    def _make_request(self, endpoint: str, payload: Dict) -> Dict:
        """发送HTTP请求到DeepSeek API并处理重试"""
        url = f"{self.base_url}{endpoint}"
        retries = 0
        
        while retries <= self.max_retries:
            try:
                response = requests.post(
                    url, headers=self.headers, json=payload, timeout=self.timeout
                )
                response.raise_for_status()
                return response.json()
            except requests.exceptions.RequestException as e:
                retries += 1
                if retries > self.max_retries:
                    raise
                time.sleep(self.retry_delay * retries)
        
        raise Exception("请求DeepSeek API失败")
    
    def chat_completion(self, messages: List[Dict], model: str = "deepseek-chat") -> str:
        """调用聊天完成API并返回文本内容"""
        payload = {
            "model": model,
            "messages": messages,
            "temperature": 0.2,  # 低温度以获得更确定性的回答
            "max_tokens": 512
        }
        
        response = self._make_request("/v1/chat/completions", payload)
        return response["choices"][0]["message"]["content"]

class NewsAnalyzer:
    """新闻分析器，用于新闻真实性判别和情感分析"""
    
    def __init__(self, client: DeepSeekClient):
        self.client = client
    
    def detect_fake_news(self, news_text: str) -> int:
        """仅基于内容判别新闻真假"""
        prompt = f"""
        你是一个专业的新闻验证专家。请判断以下新闻是真新闻还是假新闻，并回答"真新闻"或"假新闻"。
        新闻内容: "{news_text}"
        回答:
        """
        
        response = self.client.chat_completion([{"role": "user", "content": prompt}])
        return 1 if "真新闻" in response else 0
    
    def analyze_sentiment(self, news_text: str) -> str:
        """分析新闻的情感倾向"""
        prompt = f"""
        请分析以下新闻的情感倾向，回答"积极"、"消极"或"中性"。
        新闻内容: "{news_text}"
        回答:
        """
        
        response = self.client.chat_completion([{"role": "user", "content": prompt}])
        
        if "积极" in response:
            return "positive"
        elif "消极" in response:
            return "negative"
        else:
            return "neutral"
    
    def detect_fake_news_with_sentiment(self, news_text: str) -> int:
        """结合情感分析判别新闻真假"""
        sentiment = self.analyze_sentiment(news_text)
        
        # 根据情感调整提示
        sentiment_prompt = {
            "positive": "这条新闻具有积极的情感倾向",
            "negative": "这条新闻具有消极的情感倾向",
            "neutral": "这条新闻具有中性的情感倾向"
        }[sentiment]
        
        prompt = f"""
        你是一个专业的新闻验证专家。请考虑以下新闻的情感倾向，并判断它是真新闻还是假新闻。
        情感倾向: {sentiment_prompt}
        新闻内容: "{news_text}"
        回答"真新闻"或"假新闻":
        """
        
        response = self.client.chat_completion([{"role": "user", "content": prompt}])
        return 1 if "真新闻" in response else 0

def load_news_data(file_path: str) -> Tuple[List[str], List[int]]:
    """加载新闻数据和真实标签"""
    texts = []
    labels = []
    
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split('\t')
            if len(parts) >= 2:
                label = int(parts[0])
                text = parts[1]
                texts.append(text)
                labels.append(label)
    
    return texts, labels

def calculate_metrics(y_true: List[int], y_pred: List[int]) -> Dict[str, float]:
    """计算准确率指标"""
    # 总准确率
    accuracy = accuracy_score(y_true, y_pred)
    
    # 分别计算真假新闻的准确率
    true_indices = [i for i, label in enumerate(y_true) if label == 1]
    fake_indices = [i for i, label in enumerate(y_true) if label == 0]
    
    if true_indices:
        accuracy_true = accuracy_score(
            [y_true[i] for i in true_indices],
            [y_pred[i] for i in true_indices]
        )
    else:
        accuracy_true = 0.0
    
    if fake_indices:
        accuracy_fake = accuracy_score(
            [y_true[i] for i in fake_indices],
            [y_pred[i] for i in fake_indices]
        )
    else:
        accuracy_fake = 0.0
    
    return {
        "accuracy": accuracy,
        "accuracy_true": accuracy_true,
        "accuracy_fake": accuracy_fake
    }

def main():
    # 初始化客户端和分析器
    client = DeepSeekClient(base_url="http://localhost:8000")  # 修改为你的API地址
    analyzer = NewsAnalyzer(client)
    
    # 加载数据
    news_file = "news_data.txt"  # 格式: 标签(0/1)\t新闻内容
    print(f"正在加载新闻数据: {news_file}")
    texts, true_labels = load_news_data(news_file)
    
    # 确保数据加载成功
    if not texts:
        print("未找到新闻数据，请检查文件路径和格式!")
        return
    
    print(f"已加载 {len(texts)} 条新闻，开始分析...")
    
    # 方法1: 仅基于内容判别
    print("\n=== 方法1: 仅基于内容判别新闻真假 ===")
    predictions_method1 = []
    
    for text in tqdm(texts, desc="分析中"):
        pred = analyzer.detect_fake_news(text)
        predictions_method1.append(pred)
        time.sleep(0.5)  # 避免请求过于频繁
    
    metrics1 = calculate_metrics(true_labels, predictions_method1)
    print_metrics(metrics1, "方法1")
    
    # 方法2: 结合情感分析判别
    print("\n=== 方法2: 结合情感分析判别新闻真假 ===")
    predictions_method2 = []
    
    for text in tqdm(texts, desc="分析中"):
        pred = analyzer.detect_fake_news_with_sentiment(text)
        predictions_method2.append(pred)
        time.sleep(0.5)  # 避免请求过于频繁
    
    metrics2 = calculate_metrics(true_labels, predictions_method2)
    print_metrics(metrics2, "方法2")
    
   
def print_metrics(metrics: Dict[str, float], method_name: str):
    """打印准确率指标"""
    print(f"{method_name} 准确率:")
    print(f"- 总准确率: {metrics['accuracy']:.2%}")
    print(f"- 真新闻准确率: {metrics['accuracy_true']:.2%}")
    print(f"- 假新闻准确率: {metrics['accuracy_fake']:.2%}")

if __name__ == "__main__":
    main()

正在加载新闻数据: news_data.txt


FileNotFoundError: [Errno 2] No such file or directory: 'news_data.txt'