In [7]:
import pandas as pd

fail_soh = 0.7
rated_capacity = 1.1
dataset = 'CALCE'

df = pd.read_csv(f'data/{dataset}_raw.csv', encoding='utf-8', usecols=['battery', 'cycle', 'capacity'])
df['soh'] = df['capacity'] / rated_capacity
df.sort_values(by=['battery', 'cycle'], inplace=True)

groups = []
for _, group in df.groupby('battery'):
    # 查找EOL
    eol_cycle = group[group['soh'] < fail_soh]['cycle'].min()
    
    # 如果没有周期的 capacity 低于 fail_soh，设定一个虚拟的 EOL
    if pd.isna(eol_cycle):
        eol_cycle = group['cycle'].max() + 1  # 假定最后周期的下一个周期为 EOL
        new_row = pd.DataFrame({
            'battery': [group['battery'].iloc[-1]],  # 获取当前电池标识
            'cycle': [eol_cycle],
            'capacity': [fail_soh * rated_capacity],
            'soh': [fail_soh]
        })
        group = pd.concat([group, new_row], ignore_index=True)
        # group = group.append({'soh': fail_soh, 'cycle': eol_cycle}, ignore_index=True)
    
    # 计算 RUL
    group['rul'] = eol_cycle - group['cycle']
    max_rul = group['rul'].max()  # EOL 作为归一化的基准
    group['rul'] = group['rul'] / max_rul  # 归一化 RUL
    groups.append(group)
df = pd.concat(groups).reset_index(drop=True)
df.to_csv(f'data/{dataset}.csv', index=False)
df

Unnamed: 0,battery,cycle,capacity,soh,rul
0,CS2_35,1,1.126385,1.023986,1.000000
1,CS2_35,2,1.126160,1.023782,0.998437
2,CS2_35,3,1.125966,1.023605,0.996875
3,CS2_35,4,1.118508,1.016825,0.995313
4,CS2_35,5,1.117210,1.015646,0.993750
...,...,...,...,...,...
3781,CS2_38,992,0.366656,0.333324,-0.330201
3782,CS2_38,993,0.366665,0.333332,-0.331544
3783,CS2_38,994,0.357495,0.324996,-0.332886
3784,CS2_38,995,0.357480,0.324981,-0.334228
