In [2]:
# 1. 环境准备和导入检查
import sys
import subprocess
import importlib
import warnings
warnings.filterwarnings('ignore')

# 检查并安装必要的包
def check_and_install():
    required_packages = {
        'torch': 'torch',
        'transformers': 'transformers',
        'pandas': 'pandas',
        'numpy': 'numpy',
        'scikit-learn': 'scikit-learn',
        'tqdm': 'tqdm'
    }
    
    missing_packages = []
    for package, import_name in required_packages.items():
        try:
            importlib.import_module(import_name)
            print(f"✓ {package} 已安装")
        except ImportError:
            print(f"✗ {package} 未安装")
            missing_packages.append(package)
    
    if missing_packages:
        print(f"\n正在安装缺失的包: {missing_packages}")
        for package in missing_packages:
            try:
                subprocess.check_call([sys.executable, "-m", "pip", "install", package])
                print(f"✓ {package} 安装成功")
            except subprocess.CalledProcessError:
                print(f"✗ {package} 安装失败")
    
    print("\n环境检查完成！")

check_and_install()

✓ torch 已安装
✓ transformers 已安装
✓ pandas 已安装
✓ numpy 已安装
✗ scikit-learn 未安装
✓ tqdm 已安装

正在安装缺失的包: ['scikit-learn']
✓ scikit-learn 安装成功

环境检查完成！


In [3]:
# 2. 基础设置和导入
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, classification_report
import os
from tqdm import tqdm

# 检查transformers版本并调整导入
try:
    # 尝试从transformers导入所有需要的模块
    from transformers import AutoModelForSequenceClassification, AutoTokenizer
    print("✓ 成功导入transformers核心模块")
except ImportError as e:
    print(f"✗ transformers导入失败: {e}")
    exit(1)

# 尝试不同的方式导入AdamW
try:
    # 方式1：从transformers.optimization导入
    from transformers.optimization import AdamW
    print("✓ 从transformers.optimization导入AdamW")
except ImportError:
    try:
        # 方式2：从torch.optim导入
        from torch.optim import AdamW
        print("✓ 从torch.optim导入AdamW")
    except ImportError:
        try:
            # 方式3：直接使用transformers中的AdamW
            from transformers import AdamW
            print("✓ 直接导入AdamW")
        except ImportError:
            print("✗ 无法找到AdamW，将使用torch.optim.Adam")
            AdamW = None

# 导入scheduler
try:
    from transformers import get_linear_schedule_with_warmup
    print("✓ 导入get_linear_schedule_with_warmup")
except ImportError:
    print("✗ 无法导入get_linear_schedule_with_warmup")
    get_linear_schedule_with_warmup = None

# 设置随机种子
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n设备信息:")
print(f"- 使用设备: {device}")
if torch.cuda.is_available():
    print(f"- GPU名称: {torch.cuda.get_device_name(0)}")
    print(f"- GPU显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
print("-" * 50)

✓ 成功导入transformers核心模块
✓ 从torch.optim导入AdamW
✓ 导入get_linear_schedule_with_warmup

设备信息:
- 使用设备: cpu
--------------------------------------------------


In [4]:
# 3. 数据处理类
class SentimentDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_length=128, has_header=False):
        """
        初始化数据集
        Args:
            file_path: CSV文件路径
            tokenizer: 分词器
            max_length: 最大序列长度
            has_header: CSV文件是否有表头
        """
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        print(f"\n加载数据集: {file_path}")
        
        try:
            # 尝试读取CSV文件
            if has_header:
                df = pd.read_csv(file_path)
                # 检查列数
                if len(df.columns) == 3:
                    df.columns = ['polarity', 'title', 'text']
                else:
                    print(f"警告: 期望3列，但找到{len(df.columns)}列")
                    # 尝试自动分配列名
                    if len(df.columns) >= 3:
                        df = df.iloc[:, :3]
                        df.columns = ['polarity', 'title', 'text']
                    else:
                        raise ValueError(f"数据列数不足: {len(df.columns)}")
            else:
                # 没有表头，直接读取并指定列名
                df = pd.read_csv(file_path, header=None)
                
                # 检查列数
                if df.shape[1] == 3:
                    df.columns = ['polarity', 'title', 'text']
                elif df.shape[1] > 3:
                    print(f"警告: 数据有{df.shape[1]}列，只取前3列")
                    df = df.iloc[:, :3]
                    df.columns = ['polarity', 'title', 'text']
                else:
                    raise ValueError(f"数据列数不足: {df.shape[1]}")
            
            # 显示数据基本信息
            print(f"- 原始数据行数: {len(df)}")
            
            # 数据预处理
            # 处理缺失值
            df['title'] = df['title'].fillna('')
            df['text'] = df['text'].fillna('')
            
            # 合并title和text
            df['combined_text'] = df['title'].astype(str) + " " + df['text'].astype(str)
            
            # 检查极性值的有效性
            valid_polarity = df['polarity'].isin([1, 2])
            if not valid_polarity.all():
                invalid_count = (~valid_polarity).sum()
                print(f"警告: 发现{invalid_count}个无效的极性值，将被移除")
                df = df[valid_polarity].copy()
            
            # 标签映射：1(负面)->0, 2(正面)->1
            df['label'] = df['polarity'].apply(lambda x: 0 if x == 1 else 1)
            
            self.texts = df['combined_text'].tolist()
            self.labels = df['label'].tolist()
            
            print(f"- 有效数据行数: {len(self.labels)}")
            print(f"- 标签分布: 负面(0): {self.labels.count(0)}, 正面(1): {self.labels.count(1)}")
            
            # 显示前3个样本
            print("- 样本示例:")
            for i in range(min(3, len(self.labels))):
                print(f"  样本{i+1}: 文本长度={len(self.texts[i])}, 标签={self.labels[i]}")
            
        except FileNotFoundError:
            print(f"错误: 文件 {file_path} 未找到")
            raise
        except Exception as e:
            print(f"错误: 读取文件时出错 - {e}")
            raise
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # 分词
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

print("✓ SentimentDataset类定义完成")

✓ SentimentDataset类定义完成


In [5]:
# 4. 训练和评估函数
def train_epoch(model, data_loader, optimizer, scheduler, device):
    """训练一个epoch"""
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    progress_bar = tqdm(data_loader, desc="训练")
    for batch_idx, batch in enumerate(progress_bar):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        optimizer.zero_grad()
        
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_loss += loss.item()
        
        loss.backward()
        
        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        if scheduler:
            scheduler.step()
        
        # 获取预测结果
        preds = torch.argmax(outputs.logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        # 更新进度条
        if batch_idx % 10 == 0:
            progress_bar.set_postfix({'loss': loss.item()})
    
    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1

def eval_model(model, data_loader, device, dataset_name="验证集"):
    """评估模型"""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        progress_bar = tqdm(data_loader, desc=f"评估{dataset_name}")
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    print(f"\n{dataset_name}分类报告:")
    print(classification_report(all_labels, all_preds, 
                                target_names=['负面', '正面'],
                                digits=4))
    
    return avg_loss, accuracy, f1, all_preds, all_labels

print("✓ 训练和评估函数定义完成")

✓ 训练和评估函数定义完成


In [9]:
# 5. 主训练流程
def main():
    print("\n" + "="*60)
    print("开始情感分类训练（使用采样数据）")
    print("="*60)
    
    # 配置参数
    MODEL_NAME = "Qwen/Qwen2.5-0.5B"
    MAX_LENGTH = 128
    BATCH_SIZE = 8  # 较小的批次大小以适应显存
    EPOCHS = 2  # 减少epochs以快速测试
    LEARNING_RATE = 2e-5
    WARMUP_STEPS = 100  # 减少warmup步数
    
    # 数据采样配置
    TRAIN_MAX_SAMPLES = 10000  # 训练集最多使用10000个样本
    DEV_MAX_SAMPLES = 1000     # 验证集最多使用1000个样本
    TEST_MAX_SAMPLES = 1000    # 测试集最多使用1000个样本
    
    print(f"\n模型配置:")
    print(f"- 模型: {MODEL_NAME}")
    print(f"- 最大长度: {MAX_LENGTH}")
    print(f"- 批次大小: {BATCH_SIZE}")
    print(f"- 训练轮数: {EPOCHS}")
    print(f"- 学习率: {LEARNING_RATE}")
    print(f"- 训练集最大样本数: {TRAIN_MAX_SAMPLES}")
    print(f"- 验证集最大样本数: {DEV_MAX_SAMPLES}")
    print(f"- 测试集最大样本数: {TEST_MAX_SAMPLES}")
    
    # 检查数据文件
    data_files = ["train.csv", "dev.csv", "test.csv"]
    missing_files = []
    
    print(f"\n检查数据文件:")
    for file in data_files:
        if os.path.exists(file):
            # 检查文件行数
            try:
                with open(file, 'r', encoding='utf-8') as f:
                    line_count = sum(1 for line in f)
                file_size = os.path.getsize(file) / 1024 / 1024  # MB
                print(f"✓ {file}: {line_count} 行, {file_size:.2f} MB")
            except:
                file_size = os.path.getsize(file) / 1024 / 1024  # MB
                print(f"✓ {file}: {file_size:.2f} MB (无法统计行数)")
        else:
            print(f"✗ {file}: 未找到")
            missing_files.append(file)
    
    if missing_files:
        print(f"\n警告: 缺少以下数据文件: {missing_files}")
        print("请确保文件在当前目录下")
        
        # 创建小规模测试数据
        create_test_data = input("是否创建测试数据? (y/n): ").lower()
        if create_test_data == 'y':
            create_test_datasets()
            print("测试数据已创建")
        else:
            print("请手动准备数据文件后重新运行")
            return
    
    print(f"\n加载分词器: {MODEL_NAME}")
    try:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        # 设置填充token
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        print("✓ 分词器加载成功")
    except Exception as e:
        print(f"✗ 分词器加载失败: {e}")
        return
    
    print(f"\n加载模型: {MODEL_NAME}")
    try:
        model = AutoModelForSequenceClassification.from_pretrained(
            MODEL_NAME,
            num_labels=2,
            problem_type="single_label_classification"
        )
        
        # 设置模型填充token id
        if model.config.pad_token_id is None:
            model.config.pad_token_id = tokenizer.pad_token_id
        
        model = model.to(device)
        print("✓ 模型加载成功")
        
        # 打印模型信息
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"- 总参数量: {total_params:,}")
        print(f"- 可训练参数量: {trainable_params:,}")
        
    except Exception as e:
        print(f"✗ 模型加载失败: {e}")
        return
    
    # 加载数据集（使用采样）
    print(f"\n加载数据集（使用采样）...")
    try:
        train_dataset = SentimentDataset(
            "train.csv", 
            tokenizer, 
            MAX_LENGTH, 
            has_header=False,
            max_samples=TRAIN_MAX_SAMPLES
        )
        dev_dataset = SentimentDataset(
            "dev.csv", 
            tokenizer, 
            MAX_LENGTH, 
            has_header=False,
            max_samples=DEV_MAX_SAMPLES
        )
        test_dataset = SentimentDataset(
            "test.csv", 
            tokenizer, 
            MAX_LENGTH, 
            has_header=False,
            max_samples=TEST_MAX_SAMPLES
        )
        
        print("✓ 数据集加载成功（已采样）")
    except Exception as e:
        print(f"✗ 数据集加载失败: {e}")
        return
    
    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
    dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
    
    print(f"\n数据加载器创建完成:")
    print(f"- 训练集批次: {len(train_loader)} (共{len(train_dataset)}个样本)")
    print(f"- 验证集批次: {len(dev_loader)} (共{len(dev_dataset)}个样本)")
    print(f"- 测试集批次: {len(test_loader)} (共{len(test_dataset)}个样本)")
    
    # 配置优化器
    print(f"\n配置优化器...")
    try:
        if AdamW is not None:
            optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
            print("✓ 使用AdamW优化器")
        else:
            optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
            print("✓ 使用Adam优化器")
    except Exception as e:
        print(f"✗ 优化器配置失败: {e}")
        return
    
    # 配置学习率调度器
    scheduler = None
    if get_linear_schedule_with_warmup:
        total_steps = len(train_loader) * EPOCHS
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=WARMUP_STEPS,
            num_training_steps=total_steps
        )
        print("✓ 学习率调度器配置成功")
    else:
        print("⚠ 无法配置学习率调度器，将不使用warmup")
    
    # 训练循环
    print(f"\n开始训练循环...")
    best_f1 = 0
    best_model_path = "best_qwen_sentiment_model"
    history = []
    
    for epoch in range(EPOCHS):
        print(f"\n{'='*50}")
        print(f"Epoch {epoch + 1}/{EPOCHS}")
        print(f"{'='*50}")
        
        # 训练
        train_loss, train_acc, train_f1 = train_epoch(
            model, train_loader, optimizer, scheduler, device
        )
        print(f"训练结果 - Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, F1: {train_f1:.4f}")
        
        # 在验证集上评估
        print(f"\n在验证集上评估...")
        dev_loss, dev_acc, dev_f1, _, _ = eval_model(model, dev_loader, device, "验证集")
        print(f"验证集结果 - Loss: {dev_loss:.4f}, Accuracy: {dev_acc:.4f}, F1: {dev_f1:.4f}")
        
        # 保存历史记录
        history.append({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_acc': train_acc,
            'train_f1': train_f1,
            'dev_loss': dev_loss,
            'dev_acc': dev_acc,
            'dev_f1': dev_f1
        })
        
        # 保存最佳模型
        if dev_f1 > best_f1:
            best_f1 = dev_f1
            try:
                model.save_pretrained(best_model_path)
                tokenizer.save_pretrained(best_model_path)
                print(f"✓ 保存最佳模型到 {best_model_path}，F1分数: {best_f1:.4f}")
            except Exception as e:
                print(f"✗ 保存模型失败: {e}")
    
    # 保存训练历史
    history_df = pd.DataFrame(history)
    history_df.to_csv('training_history.csv', index=False)
    print(f"\n✓ 训练历史已保存到 training_history.csv")
    
    # 测试最佳模型
    print(f"\n{'='*50}")
    print("测试最佳模型")
    print(f"{'='*50}")
    
    try:
        # 加载最佳模型
        best_model = AutoModelForSequenceClassification.from_pretrained(best_model_path)
        best_model = best_model.to(device)
        print("✓ 最佳模型加载成功")
        
        # 在测试集上评估
        test_loss, test_acc, test_f1, test_preds, test_labels = eval_model(
            best_model, test_loader, device, "测试集"
        )
        
        print(f"\n最终测试结果:")
        print(f"- Loss: {test_loss:.4f}")
        print(f"- Accuracy: {test_acc:.4f}")
        print(f"- F1 Score: {test_f1:.4f}")
        
        # 保存预测结果
        test_results = pd.DataFrame({
            '真实标签': test_labels,
            '预测标签': test_preds,
            '预测正确': [1 if pred == true else 0 for pred, true in zip(test_preds, test_labels)]
        })
        test_results.to_csv('test_predictions.csv', index=False)
        print(f"\n✓ 预测结果已保存到 test_predictions.csv")
        
        # 保存最终模型
        final_model_path = "final_qwen_sentiment_model"
        best_model.save_pretrained(final_model_path)
        tokenizer.save_pretrained(final_model_path)
        print(f"✓ 最终模型已保存到 {final_model_path}")
        
    except Exception as e:
        print(f"✗ 测试最佳模型失败: {e}")

print("✓ 主训练流程定义完成（已添加采样功能）")

✓ 主训练流程定义完成（已添加采样功能）


In [10]:
# 6. 辅助函数和测试
def create_test_datasets():
    """创建测试数据集"""
    print("\n创建测试数据集...")
    
    # 创建小规模训练数据（1000行，用于测试）
    train_data = {
        'polarity': [1, 2] * 500,  # 1000行
        'title': ['负面评论' if i % 2 == 0 else '正面评论' for i in range(1000)],
        'text': ['质量很差，不推荐购买。' if i % 2 == 0 else '质量很好，强烈推荐！' for i in range(1000)]
    }
    
    # 创建验证和测试数据（各100行）
    test_data = {
        'polarity': [1, 2] * 50,  # 100行
        'title': ['测试负面' if i % 2 == 0 else '测试正面' for i in range(100)],
        'text': ['这是一个测试评论。' for _ in range(100)]
    }
    
    # 保存为CSV文件（无表头）
    pd.DataFrame(train_data).to_csv('train.csv', index=False, header=False)
    pd.DataFrame(test_data).to_csv('dev.csv', index=False, header=False)
    pd.DataFrame(test_data).to_csv('test.csv', index=False, header=False)
    
    print("✓ 测试数据集创建完成:")
    print(f"  - train.csv: {len(train_data['polarity'])} 行")
    print(f"  - dev.csv: {len(test_data['polarity'])} 行")
    print(f"  - test.csv: {len(test_data['polarity'])} 行")

def predict_sentiment(text, model_path="final_qwen_sentiment_model", max_length=128):
    """使用训练好的模型预测文本情感"""
    try:
        # 加载模型和分词器
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForSequenceClassification.from_pretrained(model_path)
        model = model.to(device)
        model.eval()
        
        # 预处理文本
        encoding = tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=max_length,
            return_tensors='pt'
        )
        
        # 预测
        with torch.no_grad():
            input_ids = encoding['input_ids'].to(device)
            attention_mask = encoding['attention_mask'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(outputs.logits, dim=1)
            
            # 将0/1映射回原始标签
            sentiment_label = predictions.item()
            sentiment_text = "负面" if sentiment_label == 0 else "正面"
            
            # 计算置信度
            probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
            confidence = probabilities[0][sentiment_label].item()
        
        return sentiment_text, confidence, sentiment_label
    
    except Exception as e:
        print(f"预测失败: {e}")
        return "未知", 0.0, -1

def test_prediction():
    """测试预测函数"""
    print("\n" + "="*50)
    print("测试情感预测")
    print("="*50)
    
    test_texts = [
        "这个产品质量非常好，我非常满意！",
        "糟糕的体验，不会再购买了。",
        "一般般，没什么特别的感觉，凑合能用。",
        "物超所值，性价比很高，推荐给大家。",
        "客服态度差，解决问题效率低。"
    ]
    
    for i, text in enumerate(test_texts, 1):
        sentiment, confidence, label = predict_sentiment(text)
        print(f"\n测试 {i}:")
        print(f"文本: {text}")
        print(f"情感: {sentiment} (标签: {label})")
        print(f"置信度: {confidence:.4f}")
        print("-" * 40)

print("✓ 辅助函数定义完成")

# 运行测试
if __name__ == "__main__":
    print("准备运行主程序...")
    
    # 首先测试创建数据
    test_create = input("是否创建测试数据? (y/n): ").lower()
    if test_create == 'y':
        create_test_datasets()
    
    # 运行主训练
    run_main = input("是否开始训练? (y/n): ").lower()
    if run_main == 'y':
        main()
    
    # 测试预测
    test_predict = input("是否测试预测功能? (y/n): ").lower()
    if test_predict == 'y':
        test_prediction()
    
    print("\n" + "="*50)
    print("程序执行完成！")
    print("="*50)

✓ 辅助函数定义完成
准备运行主程序...

创建测试数据集...
✓ 测试数据集创建完成:
  - train.csv: 1000 行
  - dev.csv: 100 行
  - test.csv: 100 行

开始情感分类训练（使用采样数据）

模型配置:
- 模型: Qwen/Qwen2.5-0.5B
- 最大长度: 128
- 批次大小: 8
- 训练轮数: 2
- 学习率: 2e-05
- 训练集最大样本数: 10000
- 验证集最大样本数: 1000
- 测试集最大样本数: 1000

检查数据文件:
✓ train.csv: 1000 行, 0.05 MB
✓ dev.csv: 100 行, 0.00 MB
✓ test.csv: 100 行, 0.00 MB

加载分词器: Qwen/Qwen2.5-0.5B


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

✓ 分词器加载成功

加载模型: Qwen/Qwen2.5-0.5B


config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen2.5-0.5B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✓ 模型加载成功
- 总参数量: 494,034,560
- 可训练参数量: 494,034,560

加载数据集（使用采样）...

加载数据集: train.csv
- 原始数据行数: 1000
- 有效数据行数: 1000
- 标签分布: 负面(0): 500, 正面(1): 500
- 样本示例:
  样本1: 文本='负面评论 质量很差，不推荐购买。', 标签=0
  样本2: 文本='正面评论 质量很好，强烈推荐！', 标签=1
  样本3: 文本='负面评论 质量很差，不推荐购买。', 标签=0

加载数据集: dev.csv
- 原始数据行数: 100
- 有效数据行数: 100
- 标签分布: 负面(0): 50, 正面(1): 50
- 样本示例:
  样本1: 文本='测试负面 这是一个测试评论。', 标签=0
  样本2: 文本='测试正面 这是一个测试评论。', 标签=1
  样本3: 文本='测试负面 这是一个测试评论。', 标签=0

加载数据集: test.csv
- 原始数据行数: 100
- 有效数据行数: 100
- 标签分布: 负面(0): 50, 正面(1): 50
- 样本示例:
  样本1: 文本='测试负面 这是一个测试评论。', 标签=0
  样本2: 文本='测试正面 这是一个测试评论。', 标签=1
  样本3: 文本='测试负面 这是一个测试评论。', 标签=0
✓ 数据集加载成功（已采样）

数据加载器创建完成:
- 训练集批次: 125 (共1000个样本)
- 验证集批次: 13 (共100个样本)
- 测试集批次: 13 (共100个样本)

配置优化器...
✓ 使用AdamW优化器
✓ 学习率调度器配置成功

开始训练循环...

Epoch 1/2


训练: 100%|██████████| 125/125 [09:23<00:00,  4.51s/it, loss=0]     


训练结果 - Loss: 0.1812, Accuracy: 0.9750, F1: 0.9750

在验证集上评估...


评估验证集: 100%|██████████| 13/13 [00:13<00:00,  1.01s/it]



验证集分类报告:
              precision    recall  f1-score   support

          负面     0.5000    1.0000    0.6667        50
          正面     0.0000    0.0000    0.0000        50

    accuracy                         0.5000       100
   macro avg     0.2500    0.5000    0.3333       100
weighted avg     0.2500    0.5000    0.3333       100

验证集结果 - Loss: 9.3914, Accuracy: 0.5000, F1: 0.3333
✓ 保存最佳模型到 best_qwen_sentiment_model，F1分数: 0.3333

Epoch 2/2


训练: 100%|██████████| 125/125 [08:02<00:00,  3.86s/it, loss=0]


训练结果 - Loss: 0.0000, Accuracy: 1.0000, F1: 1.0000

在验证集上评估...


评估验证集: 100%|██████████| 13/13 [00:13<00:00,  1.02s/it]



验证集分类报告:
              precision    recall  f1-score   support

          负面     0.5000    1.0000    0.6667        50
          正面     0.0000    0.0000    0.0000        50

    accuracy                         0.5000       100
   macro avg     0.2500    0.5000    0.3333       100
weighted avg     0.2500    0.5000    0.3333       100

验证集结果 - Loss: 9.3921, Accuracy: 0.5000, F1: 0.3333

✓ 训练历史已保存到 training_history.csv

测试最佳模型
✓ 最佳模型加载成功


评估测试集: 100%|██████████| 13/13 [00:13<00:00,  1.05s/it]



测试集分类报告:
              precision    recall  f1-score   support

          负面     0.5000    1.0000    0.6667        50
          正面     0.0000    0.0000    0.0000        50

    accuracy                         0.5000       100
   macro avg     0.2500    0.5000    0.3333       100
weighted avg     0.2500    0.5000    0.3333       100


最终测试结果:
- Loss: 9.3914
- Accuracy: 0.5000
- F1 Score: 0.3333

✓ 预测结果已保存到 test_predictions.csv
✓ 最终模型已保存到 final_qwen_sentiment_model

测试情感预测


The tokenizer you are loading from 'final_qwen_sentiment_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



测试 1:
文本: 这个产品质量非常好，我非常满意！
情感: 正面 (标签: 1)
置信度: 1.0000
----------------------------------------


The tokenizer you are loading from 'final_qwen_sentiment_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



测试 2:
文本: 糟糕的体验，不会再购买了。
情感: 负面 (标签: 0)
置信度: 1.0000
----------------------------------------


The tokenizer you are loading from 'final_qwen_sentiment_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



测试 3:
文本: 一般般，没什么特别的感觉，凑合能用。
情感: 正面 (标签: 1)
置信度: 0.5361
----------------------------------------


The tokenizer you are loading from 'final_qwen_sentiment_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



测试 4:
文本: 物超所值，性价比很高，推荐给大家。
情感: 正面 (标签: 1)
置信度: 0.9924
----------------------------------------


The tokenizer you are loading from 'final_qwen_sentiment_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



测试 5:
文本: 客服态度差，解决问题效率低。
情感: 负面 (标签: 0)
置信度: 1.0000
----------------------------------------

程序执行完成！
