1. 检查是否使用了BF16
2. 检查finetune文件中的system与测试函数中的system是否相同

In [None]:
import os
import pandas as pd
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM
import psutil
import torch
import shutil
from modelscope import snapshot_download
import subprocess
import json
import pickle

In [None]:
def gpu_info():
    """print gpu information"""

    if torch.cuda.is_available():
        gpu = torch.cuda.get_device_properties(0)
        print(f"GPU Name: {gpu.name}")
        print(f"Total GPU RAM: {gpu.total_memory / (1024 ** 3):.2f} GB")
        print(f"Compute Capability: {gpu.major}.{gpu.minor}")
        print(f"CUDA Cores: {gpu.multi_processor_count}")
    else:
        print("No GPU available.")

def system_info(description):
    """print system information"""

    def system_usage():
        # Get system memory information
        ram = psutil.virtual_memory()
        total_ram = ram.total / (1024 ** 3)
        used_ram = ram.used / (1024 ** 3)
        ram_percentage = ram.percent

        # Get GPU information
        if torch.cuda.is_available():
            gpu = torch.cuda.get_device_properties(0)
            total_gpu_ram = gpu.total_memory / (1024 ** 3)
            used_gpu_ram = torch.cuda.memory_allocated(0) / (1024 ** 3)
            gpu_percentage = (used_gpu_ram / total_gpu_ram) * 100
        else:
            total_gpu_ram = 0
            used_gpu_ram = 0
            gpu_percentage = 0

        # Get total disk usage
        disk = psutil.disk_usage('/')
        total_disk_space = disk.total / (1024 ** 3)
        used_disk_space = disk.used / (1024 ** 3)
        disk_percentage = disk.percent

        return {
            "RAM": {
                "Total": total_ram,
                "Used": used_ram,
                "Percentage": ram_percentage
            },
            "GPU_RAM": {
                "Total": total_gpu_ram,
                "Used": used_gpu_ram,
                "Percentage": gpu_percentage
            },
            "Disk": {
                "Total": total_disk_space,
                "Used": used_disk_space,
                "Percentage": disk_percentage
            }
        }

    usage_info = system_usage()
    print(description)
    print("System RAM Usage:")
    print(f"Total RAM: {usage_info['RAM']['Total']} GB")
    print(f"Used RAM: {usage_info['RAM']['Used']} GB")
    print(f"RAM Percentage: {usage_info['RAM']['Percentage']} %")
    print()
    print("GPU RAM Usage:")
    print(f"Total GPU RAM: {usage_info['GPU_RAM']['Total']} GB")
    print(f"Used GPU RAM: {usage_info['GPU_RAM']['Used']} GB")
    print(f"GPU RAM Percentage: {usage_info['GPU_RAM']['Percentage']} %")
    print()
    print("Disk Usage:")
    print(f"Total Disk Space: {usage_info['Disk']['Total']} GB")
    print(f"Used Disk Space: {usage_info['Disk']['Used']} GB")
    print(f"Disk Percentage: {usage_info['Disk']['Percentage']} %")

In [None]:
def get_model(model_id):
    """从modelscope拉取模型，并将模型移动到model文件夹内"""

    model_dir = snapshot_download(model_id)
    shutil.move(model_dir, 'model')
    model_path = os.path.abspath(os.path.join('model', os.path.basename(model_dir)))
    return model_path

In [None]:
def get_test(test_data, model, tokenizer):
    # 使用测试集数据进行推理
    def get_response(row):
        response, _ = model.chat(tokenizer, row['question'], history=None, system="评估新闻")
        return response
    test_data['result_new'] = test_data.apply(get_response, axis=1)
    # 重置索引并将字符串的json结果分割成为reason, sentiment, impact三列
    test_data['result_new'] = test_data['result_new'].map(lambda x: json.loads(x.replace("'", '"')))
    test_data = test_data.reset_index(drop=True)
    test_all = pd.concat([test_data, pd.json_normalize(test_data['result_new'])], axis=1)
    # 将新的列后面加_new后缀
    rename_dict = {col:col+'_new' for col in ['reason', 'sentiment', 'impact']}
    test_all = test_all.rename(columns=rename_dict)
    # 进行数据格式转换
    test_all[['sentiment_new', 'impact_new']] = test_all[['sentiment_new', 'impact_new']].apply(pd.to_numeric)
    # 计算情绪分数的误差
    def get_sentiment_error(row):
        old = row['sentiment_old']
        new = row['sentiment_new']
        if old * new >= 0:
            error = new - old
        else:
            error = 1 if abs(new - old) <= 1 else abs(new - old)
        return error ** 2
    # 计算影响力分数的误差
    def get_impact_error(row):
        old = row['impact_old']
        new = row['impact_new']
        return abs(new - old) ** 2
    test_all['sentiment_se'] = test_all.apply(get_sentiment_error, axis=1)
    test_all['sentiment_se_0.1'] = test_all['sentiment_se'] > 0.1
    test_all['impact_se'] = test_all.apply(get_impact_error, axis=1)
    test_all['impact_se_0.1'] = test_all['impact_se'] > 0.1
    # 打印必要的信息
    description = f"Training epochs: {i+1}"
    system_info(description)
    return test_all

In [None]:
gpu_info()

In [None]:
system_info('')

In [None]:
# 训练文件路径的列表
train_path = 'data/train'
train_list = [os.path.join(train_path, file) for file in os.listdir(train_path)]
# 测试文件的路径
test_path = 'data/test'
test_list = [pd.read_pickle(os.path.join(test_path, file)) for file in os.listdir(test_path)]

In [None]:
model_path = get_model('TongyiFinance/Tongyi-Finance-14B-Chat')
# model_path = ''
model_path

In [None]:
all_test = []
for i, train in enumerate(train_list):
    # 设置adapter的文件路径
    adapter_path = f"adapter/{i:02d}"
    # 执行bash文件进行微调
    command_bash = f"bash finetune_lora_single_gpu.sh -m {model_path} -d {train} -o {adapter_path}"
    subprocess.run(command_bash)
    # 加载模型
    model = AutoPeftModelForCausalLM.from_pretrained(
    adapter_path,
    device_map="auto",
    trust_remote_code=True).eval()
    # 加载tokenizer
    tokenizer = AutoTokenizer.from_pretrained(adapter_path, trust_remote_code=True)
    # 模型效果测试
    res_list = [get_test(data, model, tokenizer) for data in test_list]
    all_test.append(res_list)
    # 合并模型并保存到model文件夹中
    ft_model_path = os.path.abspath(f'model/finetune_{i:02d}')
    model.merge_and_unload().save_pretrained(ft_model_path, max_shard_size="2048MB", safe_serialization=True)
    tokenizer.save_pretrained(ft_model_path)
    for file in os.listdir(model_path):
        if file.endswith(('.cpp', '.cu')):
            source_file_path = os.path.join(model_path, file)
            shutil.copy(source_file_path, ft_model_path)
    model_path = ft_model_path
with open('outcome.pkl', 'wb') as f:
    pickle.dump(all_test, f)