In [15]:
import tensorflow as tf
import json

def parse_tfrecord_to_json(tfrecord_path, output_json_path):
    # TFRecord 데이터셋 읽기
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)

    parsed_records = []

    for raw_record in raw_dataset:
        # TFRecord를 tf.train.SequenceExample로 디코딩
        sequence_example = tf.train.SequenceExample()
        sequence_example.ParseFromString(raw_record.numpy())

        # Context 데이터를 추출
        context_features = {
            key: feature.bytes_list.value[0].decode("utf-8")
            if feature.HasField("bytes_list")
            else feature.int64_list.value[0]
            for key, feature in sequence_example.context.feature.items()
        }

        # Feature_lists 데이터 추출
        feature_lists = {}
        for key, feature_list in sequence_example.feature_lists.feature_list.items():
            feature_values = []
            for feature in feature_list.feature:
                if feature.HasField("bytes_list"):
                    # bytes_list 데이터를 숫자로 변환 (uint8로 가정)
                    decoded_values = list(feature.bytes_list.value[0])  # 바이너리 그대로 처리
                    feature_values.append(decoded_values)
                elif feature.HasField("float_list"):
                    feature_values.append(list(feature.float_list.value))
                elif feature.HasField("int64_list"):
                    feature_values.append(list(feature.int64_list.value))
            feature_lists[key] = feature_values

        # 데이터를 합쳐 저장
        parsed_record = {
            "context": context_features,
            "feature_lists": feature_lists,
        }
        parsed_records.append(parsed_record)

    # JSON 파일로 저장
    with open(output_json_path, "w", encoding="utf-8") as json_file:
        json.dump(parsed_records, json_file, ensure_ascii=False, indent=4)

    print(f"TFRecord가 JSON으로 변환되어 저장되었습니다: {output_json_path}")

In [16]:
parse_tfrecord_to_json("/data/ephemeral/home/train0580frame.tfrecord", "train0580frame.json")

2025-01-14 15:48:31.603747: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


TFRecord가 JSON으로 변환되어 저장되었습니다: train0580frame.json
