In [1]:
import pandas as pd
import json
import os
from sklearn.model_selection import train_test_split

# --- 1. Setup Paths ---
# This is our single source of truth: the final, cleaned 3-class dataset
source_file = '../data/TRAINING_DATASET_FINAL.csv'

# Define the output directory within the LLaMA-Factory project
llama_factory_data_dir = '../../LLaMA-Factory/data'
os.makedirs(llama_factory_data_dir, exist_ok=True)

# --- 2. Load the Source Data ---
print(f"--- 正在加载最终数据集: {source_file} ---")
df_source = pd.read_csv(source_file)

# --- 3. Split the data into Train (80%), Validation (10%), and Test (10%) sets ---
print("--- 正在划分数据集... ---")
# First, split off the test set (10% of the total data)
df_train_val, df_test = train_test_split(
    df_source,
    test_size=0.1,
    random_state=42,
    stratify=df_source['sentiment']
)

# Then, split the remaining 90% into training and validation sets
# test_size=0.1111 is used to get 10% of the *original* total data (10/90 ≈ 0.1111)
df_train, df_val = train_test_split(
    df_train_val,
    test_size=0.1111,
    random_state=42,
    stratify=df_train_val['sentiment']
)

print(f"数据集划分完成:")
print(f"  训练集 (Train): {len(df_train)} 条")
print(f"  验证集 (Validation): {len(df_val)} 条")
print(f"  测试集 (Test): {len(df_test)} 条")

# --- 4. Define the Conversion Function (remains the same) ---
def convert_to_jsonl(df, output_path):
    instruction = "你是一个专业的情感分析师。请判断以下用户评论的情感是“偏正向”、“中性”还是“偏负向”。"
    label_map = {0: "偏负向", 1: "中性", 2: "偏正向"}
    
    with open(output_path, 'w', encoding='utf-8') as f:
        for index, row in df.iterrows():
            json_object = {
                "instruction": instruction,
                "input": row['final_text'],
                "output": label_map[row['sentiment']]
            }
            f.write(json.dumps(json_object, ensure_ascii=False) + '\n')
    print(f"成功将 {len(df)} 条数据转换并保存到: {output_path}")

# --- 5. Convert all three sets to .jsonl format ---
print("\n--- 正在转换文件格式... ---")
convert_to_jsonl(df_train, os.path.join(llama_factory_data_dir, 'sentiment_train.jsonl'))
convert_to_jsonl(df_val, os.path.join(llama_factory_data_dir, 'sentiment_validation.jsonl'))
convert_to_jsonl(df_test, os.path.join(llama_factory_data_dir, 'sentiment_test.jsonl'))

--- 正在加载最终数据集: ../data/TRAINING_DATASET_FINAL.csv ---
--- 正在划分数据集... ---
数据集划分完成:
  训练集 (Train): 23706 条
  验证集 (Validation): 2963 条
  测试集 (Test): 2964 条

--- 正在转换文件格式... ---
成功将 23706 条数据转换并保存到: ../../LLaMA-Factory/data\sentiment_train.jsonl
成功将 2963 条数据转换并保存到: ../../LLaMA-Factory/data\sentiment_validation.jsonl
成功将 2964 条数据转换并保存到: ../../LLaMA-Factory/data\sentiment_test.jsonl


In [3]:
import pandas as pd
import json

# --- 1. 找到你的“答卷”文件 (路径保持不变) ---
eval_folder = "saves/Qwen1.5-7B-Chat/lora/eval_2025-08-09-23-25-01" 
prediction_file = f"../../LLaMA-Factory/{eval_folder}/generated_predictions.jsonl"

print(f"--- 正在从以下文件计算准确率 ---\n{prediction_file}\n")

# --- 2. 读取答卷 (保持不变) ---
predictions = []
with open(prediction_file, 'r', encoding='utf-8') as f:
    for line in f:
        predictions.append(json.loads(line))
df_results = pd.DataFrame(predictions)

# --- 3. 【新增】定义一个智能“关键词提取”函数 ---
def extract_sentiment_keyword(text):
    if "偏正向" in text:
        return "偏正向"
    elif "偏负向" in text:
        return "偏负向"
    elif "中性" in text:
        return "中性"
    else:
        return "未知" # 如果模型回答得乱七八糟，就标记为未知

# --- 4. 开始“智能阅卷” ---
# a. 清理标准答案：去掉末尾的换行符等空白
df_results['clean_label'] = df_results['label'].str.strip()
# b. 从模型的啰嗦回答中提取关键词
df_results['clean_predict'] = df_results['predict'].apply(extract_sentiment_keyword)

# c. 比较清理后的预测和清理后的标准答案
correct_predictions = df_results[df_results['clean_predict'] == df_results['clean_label']]

# --- 5. 计算最终分数 ---
total_samples = len(df_results)
correct_count = len(correct_predictions)
accuracy = correct_count / total_samples

print(f"--- 评测结果分析 ---")
print(f"总样本数: {total_samples}")
print(f"预测正确数: {correct_count}")
print(f"准确率 (Accuracy): {accuracy:.2%}") # 格式化为百分比

# --- 6. (可选) 查看一些错误的例子，帮助分析 ---
print("\n--- 错误案例抽样 (模型回答 vs. 正确答案) ---")
error_df = df_results[df_results['clean_predict'] != df_results['clean_label']]
print(error_df[['predict', 'clean_label']].head())

--- 正在从以下文件计算准确率 ---
../../LLaMA-Factory/saves/Qwen1.5-7B-Chat/lora/eval_2025-08-09-23-25-01/generated_predictions.jsonl

--- 评测结果分析 ---
总样本数: 128
预测正确数: 92
准确率 (Accuracy): 71.88%

--- 错误案例抽样 (模型回答 vs. 正确答案) ---
                                              predict clean_label
3   根据这条用户评论，“歌曲合集不错。”，可以判断其情感偏向于“正向”。用户的表述中，“不错”是...         偏正向
8   根据这句话，我会判断它的情感是"偏负向"。这句话表面看起来在谈论找不到CD的失望，但从语境中...         偏正向
17  这个用户评论的情感可以被判断为“偏负向”。虽然用户表达了对王子的音乐才华和新发布的“Vaul...          中性
19  这个用户评论的情感是“偏正向”。虽然评论中提到了一些对现有专辑的改进意愿，增加了不同语言版本...          中性
20  这个评论的情感可以被判断为"偏正向"。虽然评论者指出《献给康妮》的版本更完整，但整体上，他们...          中性
