# All used features for the three regression model implementations:

1. delta_Q<sub>100-10</sub>(V)
    - Minimum
    - Vairance
    - Skewness
    - Kurtosis
    
    
2. Discharge capacity fade curve features
    - Slope of the linear fit to the capacity fade curve, cycles 2 to 100
    - Intercept of the linear fit to capavity face curve, cycles 2 to 100
    - Discharge capacity, cycle 2
    - Difference between max discharge capacity and cycle 2
    
    
3. Other features
    - Average charge time, first 5 cycles
    - Integral of temperature over time, cycles 2 to 100
    - Minimum internal resistance, difference between cycle 100 and cycle 2
    

In [None]:
# Replicating Load Data logic

import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
import pickle
from pathlib import Path
from sklearn.linear_model import LinearRegression

In [None]:
path1 = Path("Data/batch1.pkl")
batch1 = pickle.load(open(path1, 'rb'))

#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

numBat1 = len(batch1.keys())
numBat1

In [None]:
features_df = pd.DataFrame()
features_df['cell_key'] = np.array(list(batch1.keys()))
features_df.head()

### 1. delta_Q<sub>100-10</sub>(V)

In [None]:
minimum_dQ_100_10 = np.zeros(len(batch1.keys()))
variance_dQ_100_10 = np.zeros(len(batch1.keys()))
skewness_dQ_100_10 = np.zeros(len(batch1.keys()))
kurtosis_dQ_100_10 = np.zeros(len(batch1.keys()))

for i, cell in enumerate(batch1.values()):
    c10 = cell['cycles']['10']
    c100 = cell['cycles']['100']
    dQ_100_10 = c100['Qdlin'] - c10['Qdlin']
    
    minimum_dQ_100_10[i] = np.log(np.abs(np.min(dQ_100_10)))
    variance_dQ_100_10[i] = np.log(np.var(dQ_100_10))
    skewness_dQ_100_10[i] = np.log(np.abs(skew(dQ_100_10)))
    kurtosis_dQ_100_10[i] = np.log(np.abs(kurtosis(dQ_100_10)))

features_df["minimum_dQ_100_10"] = minimum_dQ_100_10
features_df["variance_dQ_100_10"] = variance_dQ_100_10
features_df["skewness_dQ_100_10"] = skewness_dQ_100_10
features_df["kurtosis_dQ_100_10"] = kurtosis_dQ_100_10

features_df.head()

### 2. Discharge capacity fade curve features

In [None]:
slope_lin_fit_2_100 = np.zeros(len(batch1.keys()))
intercept_lin_fit_2_100 = np.zeros(len(batch1.keys()))
discharge_capacity_2 = np.zeros(len(batch1.keys()))
diff_discharge_capacity_max_2 = np.zeros(len(batch1.keys()))

for i, cell in enumerate(batch1.values()):
    # Compute linear fit for cycles 2 to 100:
    q = cell['summary']['QD'][1:100].reshape(-1, 1)  # discharge cappacities; q.shape = (99, 1); 
    X = cycle_numbers = cell['summary']['cycle'][1:100].reshape(-1, 1)  # Cylce index from 2 to 100; X.shape = (99, 1)
    
    linear_regressor_2_100 = LinearRegression()
    linear_regressor_2_100.fit(X, q)
    
    slope_lin_fit_2_100[i] = linear_regressor_2_100.coef_[0]
    intercept_lin_fit_2_100[i] = linear_regressor_2_100.intercept_
    discharge_capacity_2[i] = q[0][0]
    diff_discharge_capacity_max_2[i] = np.max(q) - q[0][0]
    
features_df["slope_lin_fit_2_100"] = slope_lin_fit_2_100
features_df["intercept_lin_fit_2_100"] = intercept_lin_fit_2_100
features_df["discharge_capacity_2"] = discharge_capacity_2
features_df["diff_discharge_capacity_max_2"] = diff_discharge_capacity_max_2

features_df.head()

### 3. Other features

In [None]:
mean_charge_time = np.zeros(len(batch1.keys()))
minimun_IR = np.zeros(len(batch1.keys()))
diff_IR_100_2 = np.zeros(len(batch1.keys()))


for i, cell in enumerate(batch1.values()):
    mean_charge_time[i] = np.mean(cell['summary']['chargetime'][1:6])
    minimun_IR[i] = np.min(cell['summary']['IR'][1:100])
    diff_IR_100_2[i] = cell['summary']['IR'][100] - cell['summary']['IR'][1]
    
features_df["mean_charge_time"] = mean_charge_time
features_df["minimun_IR"] = minimun_IR
features_df["diff_IR_100_2"] = diff_IR_100_2

features_df.head()

In [None]:
# There are some outliers, which might have to be taken care of
features_df.head()