In [None]:
# 먼저 완전한 방전 전압 곡선을 사용해서 모델을 학습

In [1]:
import pandas as pd
import os
import numpy as np
from scipy.interpolate import InterpolatedUnivariateSpline
import copy
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error
from glob import glob
from torch.optim.lr_scheduler import StepLR

In [2]:
def load_csvs(csv_paths):
    ## 파일에 있는 데이터들을 전치리 한후에 모두 다 병합
    data = []
    cycles = []
   
    for path in (csv_paths):
        assert os.path.exists(path) #assert는 os에 해당 path가 존재하지 않으면 AssertError를 발생한다.
        df = pd.read_csv(path)
#         if df.Cycle_Index.unique().shape[0] > 2000:
#             continue
        print(df.Cycle_Index.unique().shape[0])
        df = df[df['Step_Index'] == 10]
        df = df[df['Step_Time'] > 0] # remove outliers
        df = df[df['Step_Time'] < 5000] #remove outliers
        df = df[df["Cycle_Index"] > 0] # remove outliers
            
        df = df[df["Voltage"] <3.21] # data reduction part -> 데이터의 특징을 더 잘나타낼 수 있는 부분 선택
        # df = df[df["Voltage"] >2.9] # 학습시킬 voltage segment구간>> 이걸 바꿔가면서 모델링 진행하면됨 원래 2.95
        df = df[df["Voltage"] >2]
        df["SOH"] = df['Charge_Capacity'] / 1.1 # 공칙용량으로 나눠줌 >> 즉 charge_capcity를 퍼센트로 확인 가능
            
        
        data.append(df)
        cycle = df["Cycle_Index"].unique() # 각 파일당 사이클 횟수 저장
        cycles.append(cycle)
        
        print (f"total points in {path}", df.shape)
    
    
    return data, cycles


In [3]:
def plot_cycles(data, cycles):
    print("plotting...")
    for d, cycle in zip(data, cycles):
        for i,c in enumerate(cycle):
            cycle_data = d[d.Cycle_Index == c]
            time, voltage = cycle_data['Step_Time'].values, cycle_data.Voltage 
            try: #동일한 시작점으로 맞춰주는 과정
                time = time - time[0] 
            except:
                print (time, voltage, "skipping")
                continue
            plt.xlabel("time")
            plt.ylabel("voltage")
            plt.plot(time,voltage)

    plt.show()

In [4]:
def fit_spline(data, cycles,length ,no_points=500): # no_points 구간별 point 개수
    i = 0
    
    for cycle, d in zip (cycles, data):
        cell_np = []
        for index, c in enumerate (cycle):
            cycle_data = d[d["Cycle_Index"] == c]
            
            time, voltage, SOH, ind = cycle_data.Step_Time.values, cycle_data.Voltage, cycle_data.SOH, cycle_data.Cycle_Index
            time = time - time[0]
            
            # using spline function to to make dimensions of all cycles equal (to create equal number of points in every cycle)
            ius = InterpolatedUnivariateSpline(time, voltage, k=1) # class 반환

            # creating linear space 0, 200 of 20,000 points >> 0에서 700사이의 20000개의 일정한 간격의 요소를 만듬
            xi = np.linspace(0,700, 20000)
            yi_test = ius(xi) # 불연속인 y값을 대체할 yi_test값 생성

            index_1 = np.argmin(np.abs(yi_test[:1000] - 3.2)) #argmin 최솟값 위치 색인 
            # index_2 = np.argmin(np.abs(yi_test - 3)) # 원래 3
            index_2 = np.argmin(np.abs(yi_test - (3.2 - length))) # 원래 3

            indices = np.linspace(index_1, index_2, no_points).astype(int)

            #discarding points that are greater than time
#             xi = xi[xi < time[-1]]
            #creating indices equal to number of points of xi and selecting 400 equidistant integers
#             indices = np.linspace(0, time[-1]*100 -1, no_points).astype(int)
#             #selecting time at specified indices
            xi = xi[indices]
            yi = ius(xi)
            # if yi[yi<2.998].any():
            #     continue
# 각 변수별 일차원 배열을 생성해주는 과정                       
            ind = np.zeros_like(xi) + ind.values[0] #ind = step_time
            SOH = np.zeros_like(xi) + np.max(SOH)
            cell_np.append(np.array([xi, yi, SOH, ind])) # 리스트에 합침
        cell_np = np.concatenate(cell_np, axis=1) 
        cell_np = np.transpose(cell_np, (1,0)) 
        data[i] = pd.DataFrame(cell_np, columns = ['Step_Time', 'Voltage', 'SOH', 'Cycle_Index']) # 데이터 프레임화
        i+=1
        
    return data

In [5]:
def find_error(data, cycles): # fit_spline 함수 에러 뜬 데이터 오류난 부분의 인덱스를 잡는 함수
    
    for d, cycle in zip(data, cycles):
        for i,c in enumerate(cycle):
            cycle_data = d[d.Cycle_Index == c]
            time, voltage = cycle_data['Step_Time'].values, cycle_data.Voltage 
            
            pts = len(time)
            
            for i in range(pts -1):
                if time[i + 1] - time[i] <= 0:
                    print (f"Error is in {c} cycle at time {time[i]}")

In [42]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 전처리

In [43]:
# mit_csv have pathes of all csv files in raw_mit file(즉 방전상태 뿐만아니라 충전과정 까지 포함한 데이터) > 총 10개의 모집단
# mit_csvs = glob("/content/drive/MyDrive//battery capcity estimation//Battery SOH//DATA//MIT discharge data//*csv") # 방전 그래프 
mit_csvs = glob("/content/drive/MyDrive//battery capcity estimation//data//*csv") # 방전 그래프 
# mit_csvs = glob("/content/drive/MyDrive//battery capcity estimation//data//CSV from pkl//*csv") # 전체 충방전 그래프 그릴때

#glob("./Capacity_data/MIT/*")
for csv in mit_csvs:
    print (csv)
# b1이 들어간 파일은 mit_csv리스트에 담지 않는다
mit_csvs = [i for i in mit_csvs if "b1c21" not in i]


/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_6C-40per_3C_CH26_b1c25_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_5_4C-80per_5_4C_CH12_b1c20_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_6C-40per_3C_CH25_b1c24_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_5_4C-80per_5_4C_CH11_b1c21_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_5_4C-60per_3C_CH16_b1c15_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_6C-40per_3_6C_CH34_b1c27_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_6C-50per_3_6C_CH36_b1c31_discharge.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-05-12_8C-25per_3_6C_CH45_b1c42_discharge_corrected.csv
/content/drive/MyDrive//battery capcity estimation//data/2017-06-30_2C-10per_6C_CH10_b2c1_discharge.csv
/content/drive/MyDrive//batter

In [56]:
check_csvs = [mit_csvs[14]]
check_csvs

['/content/drive/MyDrive//battery capcity estimation//data/b3c45.csv']

In [57]:
# reducted entire voltage curve
check_data, check_cycles = load_csvs(check_csvs)
# plot_cycles(mit_data, mit_cycles)

1800
total points in /content/drive/MyDrive//battery capcity estimation//data/b3c45.csv (596021, 8)


In [58]:
find_error(check_data,check_cycles)

In [59]:
# fit_spline 제대로 되는지 체크
check_d = fit_spline(check_data.copy(),check_cycles,0.2,no_points=500)