In [None]:
# 파일 경로 지정
file_path = './path/to/file'

## Json 파일 구조확인

In [16]:
import json
import pandas as pd

# JSON 파일의 구조를 확인하는 함수
def inspect_json_structure(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        
        if isinstance(data, dict):
            for key in data.keys():
                print(f"Key: {key}, Type: {type(data[key])}")
        elif isinstance(data, list):
            print(f"List of {len(data)} items")
            if len(data) > 0:
                print(f"First item type: {type(data[0])}")

# JSON 파일을 읽어서 DataFrame으로 변환
def json_to_dataframe(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        
        if isinstance(data, list):
            df = pd.DataFrame(data)
        elif isinstance(data, dict):
            df = pd.DataFrame([data])
        
        return df

# JSON 파일 경로 지정
file_path = '/home/jinhuijun/addb_iot/fast_charge/FastCharge_000041_CH10_structure.json'

# JSON 파일 구조 확인
inspect_json_structure(file_path)

# JSON 데이터를 DataFrame으로 변환하고 출력
df = json_to_dataframe(file_path)
print(df)


Key: @module, Type: <class 'str'>
Key: @class, Type: <class 'str'>
Key: barcode, Type: <class 'str'>
Key: protocol, Type: <class 'str'>
Key: channel_id, Type: <class 'int'>
Key: summary, Type: <class 'dict'>
Key: cycles_interpolated, Type: <class 'dict'>
Key: diagnostic_summary, Type: <class 'NoneType'>
Key: diagnostic_interpolated, Type: <class 'NoneType'>
Key: @version, Type: <class 'str'>
          @module              @class         barcode  \
0  beep.structure  ProcessedCyclerRun  el150800460605   

                                    protocol  channel_id  \
0  2017-06-30_tests\20170629-2C_10per_6C.sdu           9   

                                             summary  \
0  {'cycle_index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,...   

                                 cycles_interpolated diagnostic_summary  \
0  {'voltage': [2.799999952316284, 2.800700664520...               None   

  diagnostic_interpolated               @version  
0                    None  2020.10.19.20-8667111  


## Dataframe으로 변환

In [2]:
import json
import pandas as pd

# JSON 파일 경로 지정
file_path = '/home/jinhuijun/addb_iot/fast_charge/FastCharge_000041_CH10_structure.json'

# JSON 파일 열기
with open(file_path, 'r') as file:
    data = json.load(file)

# JSON 데이터를 Pandas DataFrame으로 변환
def json_to_dataframe(data):
    # 최상위 키를 분리하여 각 데이터를 별도의 열로 확장
    records = []
    for key, value in data.items():
        if isinstance(value, dict):
            for sub_key, sub_value in value.items():
                records.append((key + '_' + sub_key, sub_value))
        else:
            records.append((key, value))
    
    # DataFrame으로 변환
    df = pd.DataFrame([dict(records)])
    return df

# DataFrame 변환 및 출력
df = json_to_dataframe(data)
print(df)


          @module              @class         barcode  \
0  beep.structure  ProcessedCyclerRun  el150800460605   

                                    protocol  channel_id  \
0  2017-06-30_tests\20170629-2C_10per_6C.sdu           9   

                                 summary_cycle_index  \
0  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...   

                          summary_discharge_capacity  \
0  [1.9543766, 1.06918, 1.0720705, 1.072906, 1.07...   

                             summary_charge_capacity  \
0  [1.4393839, 1.0686822, 1.0715642, 1.0721201, 1...   

                            summary_discharge_energy  \
0  [6.1843724, 3.2315285, 3.2203147, 3.2319348, 3...   

                               summary_charge_energy  ...  \
0  [4.7521009, 3.7519432999999998, 3.759044200000...  ...   

                         cycles_interpolated_current  \
0  [-0.10998934507369995, -0.10998789221048355, -...   

                 cycles_interpolated_charge_capacity  \
0  [1.0728078, 1.072

## Json 파일 구조, NaN 값 및 길이 확인

In [3]:
import json
import pandas as pd

# JSON 파일 경로 지정
file_path = '/home/jinhuijun/addb_iot/fast_charge/FastCharge_000041_CH10_structure.json'

# JSON 파일 열기
with open(file_path, 'r') as file:
    data = json.load(file)

# JSON 데이터의 구조 확인
def inspect_json_structure(data):
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"Key: {key}, Type: {type(value)}")
    elif isinstance(data, list):
        print(f"List of {len(data)} items")
        if len(data) > 0:
            print(f"First item type: {type(data[0])}")

# 데이터 구조 확인
inspect_json_structure(data)

# 'cycles_interpolated' 키가 존재하는지 확인하고 처리
if 'cycles_interpolated' in data:
    cycles_interpolated = data['cycles_interpolated']

    # cycles_interpolated 데이터를 DataFrame으로 변환
    cycles_interpolated_df = pd.DataFrame(cycles_interpolated)

    # NaN 값 개수 확인
    nan_counts = cycles_interpolated_df.isna().sum()
    print("NaN Counts:")
    print(nan_counts)

    # 각 열의 길이 확인
    lengths = cycles_interpolated_df.apply(len)
    print("\nLengths of each series:")
    print(lengths)

    # 데이터 개수 일치 여부 확인
    if len(set(lengths)) == 1:
        print("\nAll series have the same length.")
    else:
        print("\nSeries have different lengths.")
else:
    print("'cycles_interpolated' 키가 JSON 데이터에 존재하지 않습니다.")


Key: @module, Type: <class 'str'>
Key: @class, Type: <class 'str'>
Key: barcode, Type: <class 'str'>
Key: protocol, Type: <class 'str'>
Key: channel_id, Type: <class 'int'>
Key: summary, Type: <class 'dict'>
Key: cycles_interpolated, Type: <class 'dict'>
Key: diagnostic_summary, Type: <class 'NoneType'>
Key: diagnostic_interpolated, Type: <class 'NoneType'>
Key: @version, Type: <class 'str'>
NaN Counts:
voltage                55079
current                55079
charge_capacity            0
internal_resistance    55079
temperature            55079
discharge_capacity     55079
cycle_index                0
step_type                  0
dtype: int64

Lengths of each series:
voltage                344000
current                344000
charge_capacity        344000
internal_resistance    344000
temperature            344000
discharge_capacity     344000
cycle_index            344000
step_type              344000
dtype: int64

All series have the same length.
