In [73]:
import json
import random
import pandas as pd

dataset = "Chest-X-ray"

# convert csv to jsonl
data = pd.read_csv(f"../../data/eval/{dataset}.csv")

# 将DataFrame转换为字典列表
data_list = data.to_dict(orient='records')

# 固定随机种子并打乱数据
random.seed(1234)
random.shuffle(data_list)
split_point = int(len(data_list) * 0.75)

new_data = []
for idx, item in enumerate(data_list):
    # 提取img_path和Finding Label
    img_path = item.get("img_path")
    finding_label = item.get("Finding Label").lower()

    if isinstance(finding_label, str):
        finding_label = finding_label.replace('|', ', ')
        
    # 创建新的数据项
    new_item = {
        "image": img_path,
        "label": finding_label,
        "split": "train" if idx < split_point else "test" 
    }
    
    # 添加到new_data列表中
    new_data.append(new_item)
random.shuffle(new_data)
# 将数据保存为JSONL文件
jsonl_file = f"../../data/eval/{dataset}.jsonl"
with open(jsonl_file, mode='w', encoding='utf-8') as f:
    for item in new_data:
        f.write(json.dumps(item) + "\n")

print(f"数据已成功保存到 {jsonl_file} 文件中。")

数据已成功保存到 ../../data/eval/Chest-X-ray.jsonl 文件中。


In [72]:
import json
import random
import pandas as pd

dataset = "Chest-X-ray"
labels = set()

jsonl_file = f"../../data/eval/{dataset}.jsonl"
with open(jsonl_file, mode='w', encoding='utf-8') as f:
    for item in new_data:
        label = item.get("label")
        if label:  # 如果 label 存在
            split_labels = label.split(",")  # 根据逗号拆分标签
            split_labels = [lbl.strip() for lbl in split_labels]  # 去掉每个类别的前后空格
            labels.update(split_labels)  # 更新到集合中

# 将集合转换为列表并保存为 JSON 文件
labels_list = list(labels)
output_file = f"../../data/eval/{dataset}_classes.json"

with open(output_file, mode='w', encoding='utf-8') as f:
    json.dump(labels_list, f, ensure_ascii=False, indent=4)

print(f"Labels saved to {output_file}")

Labels saved to ../../data/eval/Chest-X-ray_classes.json


In [1]:
import json
import random
import pandas as pd

dataset = "Chest-X-ray"

data = [json.loads(line) for line in open(f"../data/eval/{dataset}.jsonl")]


new_data = []
for idx, item in enumerate(data):
    new_item = {
        "image": item["image"].replace("/srv/lby/", ""),
        "text": "What disease is indicated by the chest X-ray?", 
        "category": "conv",
        "label": item["label"],
        "question_id": f'{idx}-{item["label"]}',
    }
    new_data.append(new_item)

random.shuffle(new_data)

jsonl_file = f"../data/eval/test_prompt/{dataset}_llava_val.jsonl"
with open(jsonl_file, mode='w', encoding='utf-8') as f:
    for item in new_data:
        f.write(json.dumps(item) + "\n")
        
print(f"数据已成功保存到 {jsonl_file} 文件中。")


# You are now acting as a knowledgeable radiologist. Please analyze the provided medical image and identify the most appropriate disease category or categories present in the patient. The diagnosis can involve one or more conditions. You must select the relevant categories from the following list: ['atelectasis', 'cardiomegaly', 'pleural effusion', 'infiltration', 'lung mass', 'lung nodule', 'pneumonia', 'pneumothorax', 'consolidation', 'edema', 'emphysema', 'fibrosis', 'pleural thicken', 'hernia', 'no finding']. Remember, you should only output the categories from the list, and no additional content.
## 只输出类别的prompt
# You are now acting as a knowledgeable radiologist. Please analyze the provided medical image and identify the most appropriate disease category or categories present in the patient. You can only output the corresponding index number of the disease in the list from 0 to 14 from the given list: ['atelectasis', 'cardiomegaly', 'pleural effusion', 'infiltration', 'lung mass', 'lung nodule', 'pneumonia', 'pneumothorax', 'consolidation', 'edema', 'emphysema', 'fibrosis', 'pleural thicken', 'hernia', 'no finding'].
##ABCD尝试 
# What type of disease is shown in this chest x-ray image? Choose one from A. Atelectasis.\n B. Cardiomegaly.\n C. Pleural Effusion.\nD. Infiltration.
# ['fibrosis', 'edema', 'pneumothorax', 'cardiomegaly', 'atelectasis', 'nodule', 'emphysema', 'no finding', 'mass', 'pleural_thickening', 'effusion', 'infiltration', 'pneumonia', 'hernia', 'consolidation']
# "text": "What type of disease is shown in this chest x-ray image? Choose one from A. fibrosis.\n B. edema.\n C. pneumothorax.\nD. cardiomegaly. \nE. atelectasis \nF. nodule \nG. emphysema \nH. no finding \nI. mass \nJ. pleural thickening \nK. effusion \nL. infiltration \nM. pneumonia \nN. hernia \nO. consolidation", 


数据已成功保存到 ../data/eval/test_prompt/Chest-X-ray_llava_val.jsonl 文件中。


In [1]:
# 随机遍历jsonl文件1000行，获取每一行的question_id和text内容，question_id
from sklearn.metrics import roc_auc_score, precision_recall_curve, accuracy_score
import numpy as np
import torch
import json
import random

# jsonl_file = "/home/lby/llava_med/LLaVA-Med/llava/run/data/train/sft_data/modified_file.json"
jsonl_file = "/home/lby/llava_med/LLaVA-Med/llava/run/data/train/sft_data/classify_mimic_file_clip.json"
with open(jsonl_file, 'r', encoding='utf-8') as f:
    data = json.load(f)
        
for i, item in enumerate(data[:20]):
    print(f"Item {i + 1}: {item}")


# import csv

# csv_file = "/srv/lby/llava_med/other_data/cleaned_file.csv"

# # 打开CSV文件
# with open(csv_file, 'r', encoding='utf-8') as f:
#     reader = csv.DictReader(f)  # 使用DictReader读取CSV文件，这样每一行会被解析成字典

#     # 遍历前20行数据
#     for i, item in enumerate(reader):
#         if i >= 20:
#             break  # 只处理前20行
#         print(f"Item {i + 1}: {item}")

Item 1: {'id': 0, 'image': 'p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg', 'conversations': [{'from': 'human', 'value': 'What disease is indicated by the chest X-ray?\n<image>'}, {'from': 'gpt', 'value': 'This is a chest X-ray showing Normal'}]}
Item 2: {'id': 1, 'image': 'p10/p10000032/s53189527/2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab.jpg', 'conversations': [{'from': 'human', 'value': 'What disease is indicated by the chest X-ray?\n<image>'}, {'from': 'gpt', 'value': 'This is a chest X-ray showing Normal'}]}
Item 3: {'id': 2, 'image': 'p10/p10000032/s53911762/68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714.jpg', 'conversations': [{'from': 'human', 'value': 'What disease is indicated by the chest X-ray?\n<image>'}, {'from': 'gpt', 'value': 'This is a chest X-ray showing Normal'}]}
Item 4: {'id': 3, 'image': 'p10/p10000032/s53911762/fffabebf-74fd3a1f-673b6b41-96ec0ac9-2ab69818.jpg', 'conversations': [{'from': 'human', 'value': 'What disease is indicated by 

In [23]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, accuracy_score
import numpy as np
import torch
import json
import random

def compute_AUCs(gt, pred, n_class):
    """计算每个标签的 AUC"""
    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    for i in range(n_class):
        try:
            AUROCs.append(roc_auc_score(gt_np[:, i], pred_np[:, i]))
        except ValueError:
            AUROCs.append(np.nan)  # 当 AUC 计算失败时，填充 NaN
    return AUROCs

def get_acc_and_auc():
    # 读取数据
    output_path = '../data/eval/test_prompt/Chest-X-ray_llava_val_ans.jsonl'
    answers = [json.loads(line) for line in open(output_path)]

    disease_list = ['fibrosis', 'edema', 'pneumothorax', 'cardiomegaly', 'atelectasis', 'nodule', 'emphysema', 'no finding', 'mass', 'pleural_thickening', 'effusion', 'infiltration', 'pneumonia', 'hernia', 'consolidation']
    print(f"Total number of answers: {len(answers)}")
    
    # 映射字典：将A, B, C, D...映射到疾病列表
    label_mapping = {chr(65 + i): disease for i, disease in enumerate(disease_list)}
    # 手动映射两个列表中的疾病

    # 随机选择 1000 行
    random.shuffle(answers)
    selected_answers = answers[:1000]

    # 初始化 ground truth (gt) 和 predictions (pred)
    n_classes = len(disease_list)
    gt = torch.zeros((len(selected_answers), n_classes), dtype=torch.float32)
    pred = torch.zeros((len(selected_answers), n_classes), dtype=torch.float32)

    error_count = 0
    error_question_ids = []

    # 遍历每个 answer，提取 labels 和预测类别
    for idx, item in enumerate(selected_answers):
        # 获取标签（label），labels 可能包含多个标签
        labels = ["-".join(item["question_id"].split("-")[1:])]  # 获取 label
        labels = [label.lower() for label in labels]

        # 获取预测的 text 并映射到疾病
        text = item["text"].strip().upper()

        # 设置 ground truth
        for label in labels:
            for i, disease in enumerate(disease_list):
                if disease in label:
                    gt[idx, i] = 1  # 对应疾病的 ground truth 置为 1

        # 获取预测的类别 (A, B, C, D...)
        for char in text:
            if char in label_mapping:
                disease = label_mapping[char]
                disease_idx = disease_list.index(disease)
                pred[idx, disease_idx] = 1  # 对应疾病的预测值置为 1

        # 检查是否没有预测到任何类别
        if torch.sum(pred[idx]) == 0:
            error_count += 1
            error_question_ids.append(item["question_id"])

    # 计算多标签 AUC
    AUROCs = compute_AUCs(gt, pred, n_classes)
    AUROC_avg = np.nanmean(AUROCs)  # 计算 AUC 的平均值，忽略 NaN

    # 计算每个类别的准确率
    accs = []
    gt_np_all = gt.cpu().numpy()
    pred_np_all = pred.cpu().numpy()

    for i in range(n_classes):
        gt_np = gt_np_all[:, i]
        pred_np = pred_np_all[:, i]
        
        # print(f"Class {i} - Ground truth (sample): {gt_np[:10]}")
        # print(f"Class {i} - Predictions (sample): {pred_np[:10]}")
        acc = accuracy_score(gt_np, pred_np)
        accs.append(acc)
    
    # 计算类别平均准确率
    acc_avg = np.mean(accs)

    # 计算 F1 分数
    f1s = []

    for i in range(n_classes):
        gt_np = gt_np_all[:, i]
        pred_np = pred_np_all[:, i]

        precision, recall, thresholds = precision_recall_curve(gt_np, pred_np)
        if len(precision) > 1:
            numerator = 2 * recall * precision
            denom = recall + precision
            f1_scores = np.divide(numerator, denom, out=np.zeros_like(denom), where=(denom != 0))
            max_f1 = np.max(f1_scores)
            f1s.append(max_f1)
        else:
            f1s.append(np.nan)

    f1_avg = np.nanmean(f1s)  # 平均 F1

    # 输出结果
    print(f"Total labels: {len(selected_answers)}")
    print(f"Class distribution in ground truth: {np.sum(gt_np_all, axis=0)}")
    print(f"Class distribution in predictions: {np.sum(pred_np_all, axis=0)}")
    print(f"Average AUROC: {AUROC_avg:.4f}")
    print(f"Average F1: {f1_avg:.4f}")
    print(f"Average Accuracy: {acc_avg:.4f}")
    print(f"Number of errors: {error_count}")
    print(f"Error question IDs: {error_question_ids}")

# 调用函数进行计算
get_acc_and_auc()


Total number of answers: 1044
Total labels: 1000
Class distribution in ground truth: [ 13.  42.  96.  45. 134.  66.  37. 365.  64.  43. 191. 267.  28.   5.
  70.]
Class distribution in predictions: [296. 227.   0. 113.   0.   0.   2. 302. 421.   0.   3.  58. 320.   0.
   0.]
Average AUROC: 0.5012
Average F1: 0.1736
Average Accuracy: 0.8067
Number of errors: 0
Error question IDs: []


In [None]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, accuracy_score
import numpy as np
import torch
import json
import random

def compute_AUCs(gt, pred, n_class):
    """计算每个标签的 AUC"""
    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    for i in range(n_class):
        if np.sum(gt_np[:, i]) == 0:  # 跳过全 0 的类别
            AUROCs.append(np.nan)
        else:
            try:
                AUROCs.append(roc_auc_score(gt_np[:, i], pred_np[:, i]))
            except ValueError:
                AUROCs.append(np.nan)  # 当 AUC 计算失败时，填充 NaN
    return AUROCs

def get_acc_and_auc():
    # 读取数据
    output_path = '../data/eval/test_prompt/Chest-X-ray_llava_val_ans.jsonl'
    answers = [json.loads(line) for line in open(output_path)]

    disease_list = ['fibrosis', 'edema', 'pneumothorax', 'cardiomegaly', 'atelectasis', 'nodule', 'emphysema', 'no finding', 'mass', 'pleural_thickening', 'effusion', 'infiltration', 'pneumonia', 'hernia', 'consolidation']
    print(f"Total number of answers: {len(answers)}")
    
    # 手动映射两个列表中的疾病
    # disease_mapping = {
    #     'atelectasis': 'Atelectasis',
    #     'cardiomegaly': 'Cardiomegaly',
    #     'consolidation': 'Consolidation',
    #     'edema': 'Edema',
    #     'pneumothorax': 'Pneumothorax',
    #     'effusion': 'Pleural Effusion',
    #     'pneumonia': 'Pneumonia',
    #     'no finding': 'Normal',
    #     'pleural_thickening': 'Pleural Other',
    #     'mass': 'Lung Lesion',
    #     'nodule': 'Lung Lesion',
    #     'infiltration': 'Lung Opacity',
    #     'fibrosis': None,      # 未映射的疾病
    #     'emphysema': None,     # 未映射的疾病
    #     'hernia': None         # 未映射的疾病
    # }

    # 随机选择 1000 行
    random.shuffle(answers)
    selected_answers = answers[:1000]

    # 初始化 ground truth (gt) 和 predictions (pred)
    n_classes = len(disease_list)
    gt = torch.zeros((len(selected_answers), n_classes), dtype=torch.float32)
    pred = torch.zeros((len(selected_answers), n_classes), dtype=torch.float32)

    error_count = 0
    error_question_ids = []

    # 遍历每个 answer，提取 labels 和预测类别
    for idx, item in enumerate(selected_answers):
        # 获取标签（label），labels 可能包含多个标签
        labels = ["-".join(item["question_id"].split("-")[1:])]  # 获取 label
        labels = [label.lower() for label in labels]

        # 设置 ground truth
        for label in labels:
            if label in disease_list:
                disease_idx = disease_list.index(label)
                gt[idx, disease_idx] = 1  # 对应疾病的 ground truth 置为 1

        # 获取预测的 text 并映射到疾病
        text = item["text"].strip().lower()

        # 设置预测的类别，映射预测类别为 ground truth 中的疾病
        for disease in disease_list:
            # mapped_disease = disease_mapping.get(disease)
            mapped_disease = disease
            if mapped_disease and mapped_disease.lower() in text:  # 确保映射后的疾病出现在预测中
                disease_idx = disease_list.index(disease)
                pred[idx, disease_idx] = 1  # 对应疾病的预测值置为 1

        # 检查是否没有预测到任何类别
        if torch.sum(pred[idx]) == 0:
            error_count += 1
            error_question_ids.append(item["question_id"])

    # 计算多标签 AUC
    AUROCs = compute_AUCs(gt, pred, n_classes)
    AUROC_avg = np.nanmean(AUROCs)  # 计算 AUC 的平均值，忽略 NaN

    # 计算每个类别的准确率
    accs = []
    gt_np_all = gt.cpu().numpy()
    pred_np_all = pred.cpu().numpy()

    for i in range(n_classes):
        gt_np = gt_np_all[:, i]
        pred_np = pred_np_all[:, i]
        
        if np.sum(gt_np) == 0:  # 跳过全 0 的类别
            accs.append(np.nan)
        else:
            acc = accuracy_score(gt_np, pred_np)
            accs.append(acc)
    
    for i, acc in enumerate(accs):
        print(f"{disease_list[i]}: {acc:.4f}")
    
    # 计算类别平均准确率
    acc_avg = np.nanmean(accs)

    # 计算 F1 分数
    f1s = []

    for i in range(n_classes):
        gt_np = gt_np_all[:, i]
        pred_np = pred_np_all[:, i]

        if np.sum(gt_np) == 0:  # 跳过全 0 的类别
            f1s.append(np.nan)
        else:
            precision, recall, thresholds = precision_recall_curve(gt_np, pred_np)
            if len(precision) > 1:
                numerator = 2 * recall * precision
                denom = recall + precision
                f1_scores = np.divide(numerator, denom, out=np.zeros_like(denom), where=(denom != 0))
                max_f1 = np.max(f1_scores)
                f1s.append(max_f1)
            else:
                f1s.append(np.nan)

    f1_avg = np.nanmean(f1s)  # 平均 F1
    # 获取预测为 0 的类别索引
    zero_pred_indices = np.where(np.sum(pred_np_all, axis=0) == 0)[0]

    # 输出预测为 0 的类别
    zero_pred_diseases = [disease_list[i] for i in zero_pred_indices]
    print(f"Predicted as 0 for the following diseases: {zero_pred_diseases}")


    # 输出结果
    print(f"Total labels: {len(selected_answers)}")
    print(f"Class distribution in ground truth: {np.sum(gt_np_all, axis=0)}")
    print(f"Class distribution in predictions: {np.sum(pred_np_all, axis=0)}")
    print(f"Average AUROC: {AUROC_avg:.4f}")
    print(f"Average F1: {f1_avg:.4f}")
    print(f"Average Accuracy: {acc_avg:.4f}")
    print(f"Number of errors: {error_count}")
    print(f"Error question IDs: {error_question_ids}")

# 调用函数进行计算
get_acc_and_auc()


Total number of answers: 306
fibrosis: 0.9935
edema: 0.9902
pneumothorax: 0.8235
cardiomegaly: 0.9837
atelectasis: 0.0621
nodule: 0.9739
emphysema: 0.9967
no finding: 0.6340
mass: 0.7614
pleural_thickening: 0.9869
effusion: 0.0621
infiltration: 0.0980
pneumonia: 0.9967
hernia: 0.9935
consolidation: 0.8562
Predicted as 0 for the following diseases: ['fibrosis', 'edema', 'emphysema', 'no finding', 'pleural_thickening', 'pneumonia', 'hernia']
Total labels: 306
Class distribution in ground truth: [  2.   3.  12.   1.   8.   7.   1. 112.  11.   4.  18.  29.   1.   2.
   4.]
Class distribution in predictions: [  0.   0.  48.   6. 293.   1.   0.   0.  70.   0. 305. 305.   0.   0.
  42.]
Average AUROC: 0.5418
Average F1: 0.1019
Average Accuracy: 0.7475
Number of errors: 0
Error question IDs: []


In [26]:
import json
import random
import pandas as pd
from collections import defaultdict

dataset = "Chest-X-ray"
category_samples = defaultdict(list)

# 从 JSONL 文件中加载数据
data = [json.loads(line) for line in open(f"../data/eval/{dataset}.jsonl")]

# 目标疾病列表
disease_list = ['fibrosis', 'edema', 'pneumothorax', 'cardiomegaly', 'atelectasis', 'nodule', 'emphysema', 'no finding', 'mass', 'pleural_thickening', 'effusion', 'infiltration', 'pneumonia', 'hernia', 'consolidation']

# 收集每个类别的样本
for item in data:
    labels = item["label"]  # 假设每个数据项有一个 "label" 字段
    split = item["split"]  # 假设每个数据项有一个 "split" 字段
    if isinstance(labels, list) and split == "train":  # 如果标签是列表形式，说明可能多标签分类
        for label in labels:
            if label in disease_list:
                category_samples[label].append(item)
    else:
        if labels in disease_list and split == "train":
            category_samples[labels].append(item)

# 构建 few-shot 数据集，确保每个类别包含最多 10 个样本
few_shot_dataset = []

for category, samples in category_samples.items():
    few_shot_samples = random.sample(samples, min(10, len(samples)))  # 随机选择最多 10 个样本
    few_shot_dataset.extend(few_shot_samples)

# 将结果转换为 DataFrame 或者直接输出
df_few_shot = pd.DataFrame(few_shot_dataset)

# 保存 few-shot 数据集到 JSONL 文件
few_shot_file = f"../data/eval/{dataset}_few_shot.jsonl"
df_few_shot.to_json(few_shot_file, orient='records', lines=True)

print(f"Few-shot dataset saved to {few_shot_file}")

# import json
# import random
# import pandas as pd
# from collections import defaultdict

# dataset = "Chest-X-ray"
# category_samples = defaultdict(list)

# # 从 JSONL 文件中加载数据
# data = [json.loads(line) for line in open(f"../data/eval/{dataset}.jsonl")]

# disease_list = ['fibrosis', 'edema', 'pneumothorax', 'cardiomegaly', 'atelectasis', 'nodule', 'emphysema', 'no finding', 'mass', 'pleural_thickening', 'effusion', 'infiltration', 'pneumonia', 'hernia', 'consolidation']


# for item in data:
#     labels = item["label"]  # 假设每个数据项有一个 "label" 字段
#     split = item["split"]  # 假设每个数据项有一个 "split" 字段
#     if isinstance(labels, list) and split == "train":  # 如果标签是列表形式，说明可能多标签分类
#         for label in labels:
#             category_samples[label].append(item)
#     else:
#         category_samples[labels].append(item)

# # 构建 few-shot 数据集，确保每个类别包含 5 个样本
# few_shot_dataset = []

# for category, samples in category_samples.items():

#     few_shot_samples = random.sample(samples, min(10, len(samples)))
#     few_shot_dataset.extend(few_shot_samples)

# # 将结果转换为 DataFrame 或者直接输出
# df_few_shot = pd.DataFrame(few_shot_dataset)

# # 输出结果
# # print(df_few_shot)
# # 保存 few-shot 数据集
# few_shot_file = f"../data/eval/{dataset}_few_shot.jsonl"
# df_few_shot.to_json(few_shot_file, orient='records', lines=True)

# print(f"Few-shot dataset saved to {few_shot_file}")



Few-shot dataset saved to ../data/eval/Chest-X-ray_few_shot.jsonl


In [3]:
from transformers import AutoTokenizer

# 假设你使用的是某个特定模型的 tokenizer，例如 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained('/srv/lby/llava_med/llava-med-v1.5-mistral-7b')

# 你的 token ids
token_ids = [1, 851, 349, 264, 8118, 1500, 28733, 919, 8102]

# 使用 tokenizer 的 decode 方法还原
decoded_text = tokenizer.decode(token_ids, skip_special_tokens=True)
print(decoded_text)

This is a chest X-ray showing


In [27]:
import json


# 创建一个空列表以存储结果
formatted_data = []

# 遍历 DataFrame 中的每一行
for idx, row in df_few_shot.iterrows():
    labels = row['label']
    # 提取需要的字段
    item = {
        'id': idx,
        'image': row['image'],  # 假设该列名为 'image'
        'conversations': [
            {
                'from': 'human',
                'value': "<image>\n Fill in the blank: this is a chest X-ray showing {}"
            },
            {
                'from': 'gpt',
                'value': f"This is a chest X-ray showing {labels}"  # 假设该列名为 'label'，对应疾病名称
            }
        ]
    }
    
    # 将构建的字典添加到列表中
    formatted_data.append(item)

# 将结果保存为 JSON 格式
output_json_path = "../data/train/sft_data/Chest-X-ray_few_shot_formatted.json"
with open(output_json_path, 'w') as json_file:
    json.dump(formatted_data, json_file, indent=4)

print(f"Formatted dataset saved to {output_json_path}")

Formatted dataset saved to ../data/train/sft_data/Chest-X-ray_few_shot_formatted.json


In [1]:

import torch

# 加载权重文件
file_path = '/home/lby/llava_med/LLaVA-Med/llava/run/checkpoints/llava-lora-new-clip-v1/non_lora_trainables.bin'
non_lora_trainables = torch.load(file_path, map_location='cpu')

# 打印所有权重的键名（模块名称）
print("Keys in non_lora_trainables:")
for key in non_lora_trainables.keys():
    print(key)

# 如果想统计权重的形状
print("\nWeight shapes:")
for key, value in non_lora_trainables.items():
    print(f"{key}: {value.shape}")

Keys in non_lora_trainables:
base_model.model.model.mm_projector.0.weight
base_model.model.model.mm_projector.0.bias
base_model.model.model.mm_projector.2.weight
base_model.model.model.mm_projector.2.bias
base_model.model.mis_mlp.out_mlp.0.weight
base_model.model.mis_mlp.out_mlp.0.bias
base_model.model.mis_mlp.out_mlp.1.weight
base_model.model.mis_mlp.out_mlp.1.bias
base_model.model.mis_mlp.out_mlp.3.weight
base_model.model.mis_mlp.out_mlp.3.bias

Weight shapes:
base_model.model.model.mm_projector.0.weight: torch.Size([4096, 1024])
base_model.model.model.mm_projector.0.bias: torch.Size([4096])
base_model.model.model.mm_projector.2.weight: torch.Size([4096, 4096])
base_model.model.model.mm_projector.2.bias: torch.Size([4096])
base_model.model.mis_mlp.out_mlp.0.weight: torch.Size([4096])
base_model.model.mis_mlp.out_mlp.0.bias: torch.Size([4096])
base_model.model.mis_mlp.out_mlp.1.weight: torch.Size([1024, 4096])
base_model.model.mis_mlp.out_mlp.1.bias: torch.Size([1024])
base_model.mode