# 1.测试原始数据集的字段顺序

In [26]:
import pickle as cPickle
import collections
import numpy as np

# 你想要的字段顺序
RNA_SS_fixed = collections.namedtuple(
    'RNA_SS_fixed', ('data_fcn_2', 'seq_raw', 'length', 'name', 'contact')
)

def load_pickle(path: str):
    """
    加载 pickle 文件，并统一转换为 RNA_SS_fixed 顺序
    """
    with open(path, 'rb') as f:
        obj = cPickle.load(f)

    # 如果是 list of namedtuple，就重排字段
    if isinstance(obj, (list, tuple)) and len(obj) > 0:
        first = obj[0]
        if hasattr(first, "_fields"):
            obj = [RNA_SS_fixed(*r) for r in obj]  # 按位置重建为新 namedtuple
    return obj

def _print_value(v, indent="    ", show_all=False):
    """尽量把字段值原样打印"""
    if isinstance(v, np.ndarray):
        print(f"{indent}[ndarray] dtype={v.dtype}, shape={v.shape}, size={v.size}")
        with np.printoptions(threshold=np.inf if show_all else 1000, edgeitems=10, linewidth=180):
            print(indent + repr(v))
        return
    if isinstance(v, (list, tuple)):
        print(indent + repr(v))
        return
    if isinstance(v, dict):
        print(f"{indent}[dict] keys={len(v)}")
        for k, vv in v.items():
            print(f"{indent}  {k!r}:")
            _print_value(vv, indent + "    ", show_all)
        return
    print(indent + repr(v))

def brief_info(obj, max_show=3, show_all=False):
    """
    打印对象基本信息与前 max_show 个样本的所有字段和值。
    """
    print("== Pickle 对象类型:", type(obj).__name__)
    if isinstance(obj, (list, tuple)):
        print("条目数:", len(obj))
        if len(obj) == 0:
            return
        print("字段:", getattr(obj[0], "_fields", None))
        for i, rec in enumerate(obj[:max_show]):
            print(f"\n[Record {i}] type={type(rec).__name__}")
            if hasattr(rec, "_asdict"):
                d = rec._asdict()
                for k, v in d.items():
                    print(f"{k}:")
                    _print_value(v, "    ", show_all)
            else:
                _print_value(rec, "    ", show_all)
    else:
        _print_value(obj, "  ", show_all)

def inspect_pickle(path: str, max_show: int = 3, show_all: bool = False):
    """一行式接口：加载 + 打印"""
    print(">>> 正在加载:", path)
    obj = load_pickle(path)
    brief_info(obj, max_show=max_show, show_all=show_all)
    return obj


In [27]:
path = '/storage/student2/xiao/lql/RNADiffFold/data/bpRNA/TR0/TR0.cPickle'
obj = inspect_pickle(path)
brief_info(obj, max_show=3)

>>> 正在加载: /storage/student2/xiao/lql/RNADiffFold/data/bpRNA/TR0/TR0.cPickle
== Pickle 对象类型: list
条目数: 10794
字段: ('data_fcn_2', 'seq_raw', 'length', 'name', 'contact')

[Record 0] type=RNA_SS_fixed
data_fcn_2:
    [ndarray] dtype=int64, shape=(119, 4), size=476
    array([[0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 1, 0, 0],
