# Main notebook for battery state estimation

In [1]:
import numpy as np
import pandas as pd
import scipy.io
import math
import os
import ntpath
import sys
import logging
import time
import sys

from importlib import reload
import plotly.graph_objects as go

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from keras.layers import LSTM, Embedding, RepeatVector, TimeDistributed, Masking
from keras.callbacks import EarlyStopping, ModelCheckpoint, LambdaCallback


IS_COLAB = False

if IS_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    data_path = "/content/drive/My Drive/battery-state-estimation/battery-state-estimation/"
else:
    data_path = "../"

sys.path.append(data_path)
from data_processing.unibo_powertools_data import UniboPowertoolsData, CycleCols
from data_processing.model_data_handler import ModelDataHandler

2025-02-11 17:08:45.050090: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-11 17:08:45.248123: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-11 17:08:45.278845: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-02-11 17:08:45.278864: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not ha

### Config logging

In [2]:
reload(logging)
logging.basicConfig(format='%(asctime)s [%(levelname)s]: %(message)s', level=logging.DEBUG, datefmt='%Y/%m/%d %H:%M:%S')

# Load Data

### Initial the data object

Load the cycle and capacity data to memory based on the specified chunk size

In [3]:
dataset = UniboPowertoolsData(
    test_types=['S'],
    chunk_size=1000000,
    lines=[37, 40],
    charge_line=37,
    discharge_line=40,
    base_path=data_path
)
#line 117 di unibo_powertools_data.py

2025/02/11 17:08:47 [DEBUG]: Start loading data with lines: [37, 40], types: ['S'] and chunksize: 1000000...
2025/02/11 17:09:11 [DEBUG]: Finish loading data.
2025/02/11 17:09:11 [INFO]: Loaded raw Unibo Powertools data with cycle row count: 8214789 and capacity row count: 21876
2025/02/11 17:09:11 [DEBUG]: Start cleaning cycle raw data...
2025/02/11 17:09:15 [DEBUG]: Finish cleaning cycle raw data.
2025/02/11 17:09:15 [INFO]: Removed 5 rows of abnormal cycle raw data.
2025/02/11 17:09:15 [DEBUG]: Start cleaning capacity raw data...
2025/02/11 17:09:15 [DEBUG]: Finish cleaning capacity raw data.
2025/02/11 17:09:15 [INFO]: Removed 1 rows of abnormal capacity raw data.
2025/02/11 17:09:15 [DEBUG]: Start assigning charging raw data...
2025/02/11 17:09:15 [DEBUG]: Finish assigning charging raw data.
2025/02/11 17:09:15 [INFO]: [Charging] cycle raw count: 6355867, capacity raw count: 10942
2025/02/11 17:09:15 [DEBUG]: Start assigning discharging raw data...
2025/02/11 17:09:15 [DEBUG]: Fin

### Determine the training and testing name

Prepare the training and testing data for model data handler to load the model input and output data.

In [4]:
train_data_test_names = [
    '000-DM-3.0-4019-S', 
    '001-DM-3.0-4019-S', 
    '002-DM-3.0-4019-S', 
    '006-EE-2.85-0820-S', 
    '007-EE-2.85-0820-S', 
    '018-DP-2.00-1320-S', 
    '019-DP-2.00-1320-S',
    '036-DP-2.00-1720-S', 
    '037-DP-2.00-1720-S', 
    '038-DP-2.00-2420-S', 
    '040-DM-4.00-2320-S',
    '042-EE-2.85-0820-S', 
    '045-BE-2.75-2019-S'
]

test_data_test_names = [
    '003-DM-3.0-4019-S',
    '008-EE-2.85-0820-S',
    '039-DP-2.00-2420-S', 
    '041-DM-4.00-2320-S',    
]

dataset.prepare_data(train_data_test_names, test_data_test_names)

2025/02/11 17:09:15 [DEBUG]: Start preparing data for training: ['000-DM-3.0-4019-S', '001-DM-3.0-4019-S', '002-DM-3.0-4019-S', '006-EE-2.85-0820-S', '007-EE-2.85-0820-S', '018-DP-2.00-1320-S', '019-DP-2.00-1320-S', '036-DP-2.00-1720-S', '037-DP-2.00-1720-S', '038-DP-2.00-2420-S', '040-DM-4.00-2320-S', '042-EE-2.85-0820-S', '045-BE-2.75-2019-S'] and testing: ['003-DM-3.0-4019-S', '008-EE-2.85-0820-S', '039-DP-2.00-2420-S', '041-DM-4.00-2320-S']...
2025/02/11 17:09:24 [DEBUG]: Finish getting training and testing charge data.
2025/02/11 17:09:29 [DEBUG]: Finish getting training and testing discharge data.
2025/02/11 17:09:29 [DEBUG]: Finish cleaning training and testing charge data.
2025/02/11 17:09:29 [DEBUG]: Finish cleaning training and testing discharge data.
2025/02/11 17:09:29 [DEBUG]: Finish adding training and testing discharge SOC parameters.
2025/02/11 17:09:31 [DEBUG]: Finish adding training and testing discharge SOH parameters.
2025/02/11 17:09:31 [DEBUG]: Finish preparing da

### Initial the model data handler

Model data handler will be used to get the model input and output data for further training purpose.

In [5]:
mdh = ModelDataHandler(dataset, [
    CycleCols.VOLTAGE,
    CycleCols.CURRENT,
    CycleCols.TEMPERATURE
])

# Model training

In [6]:
train_x, train_y, test_x, test_y = mdh.get_discharge_whole_cycle(soh = False, output_capacity = False)


2025/02/11 17:09:35 [INFO]: Train x: (7697, 287, 3), train y: (7697, 287, 2) | Test x: (2025, 287, 3), test y: (2025, 287, 2)


In [None]:
#check that the data are correct

In [7]:
train_y = mdh.keep_only_capacity(train_y, is_multiple_output = True)
test_y = mdh.keep_only_capacity(test_y, is_multiple_output = True)

2025/02/11 17:09:38 [INFO]: New y: (7697, 287)
2025/02/11 17:09:38 [INFO]: New y: (2025, 287)


In [11]:
print(train_y[0])

[1.         0.99999624 0.99525774 0.990519   0.9857793  0.98104143
 0.97630054 0.97155994 0.96681845 0.9620781  0.95733654 0.95259774
 0.9478571  0.94311637 0.93837684 0.9336353  0.92889416 0.92415273
 0.91941196 0.91467184 0.9099484  0.9052082  0.900468   0.8957284
 0.89098686 0.88624734 0.8815066  0.8767671  0.8720271  0.8672878
 0.86254853 0.8578098  0.8530745  0.8483338  0.843594   0.83885336
 0.8341138  0.82937425 0.8246343  0.8198925  0.8151517  0.8104104
 0.8056703  0.80092925 0.7961904  0.79144937 0.7867084  0.78196824
 0.77722865 0.77248967 0.7677508  0.7630121  0.7582734  0.75353336
 0.74879354 0.7440528  0.7393135  0.7345719  0.7298309  0.7250894
 0.72034824 0.7156093  0.7108677  0.7061289  0.70138973 0.6966502
 0.6919244  0.6871838  0.68244314 0.677704   0.67296934 0.6682284
 0.66348827 0.6587492  0.6540084  0.6492678  0.64452654 0.6397867
 0.63504577 0.6303056  0.62556624 0.62082714 0.61609495 0.6113561
 0.60661507 0.60187674 0.597136   0.59239674 0.58765626 0.5829176
 0.5

In [8]:
EXPERIMENT = "lstm_soc_percentage"

experiment_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '_' + EXPERIMENT
print(experiment_name)

# Model definition

opt = tf.keras.optimizers.Adam(lr=0.00003)

model = Sequential()
model.add(LSTM(256, activation='selu',
                return_sequences=True,
                input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(LSTM(256, activation='selu', return_sequences=True))
model.add(LSTM(128, activation='selu', return_sequences=True))
model.add(Dense(64, activation='selu'))
model.add(Dense(1, activation='linear'))
model.summary()

model.compile(optimizer=opt, loss='huber', metrics=['mse', 'mae', 'mape', tf.keras.metrics.RootMeanSquaredError(name='rmse')])

es = EarlyStopping(monitor='val_loss', patience=50)
mc = ModelCheckpoint(data_path + 'results/trained_model/%s_best.h5' % experiment_name, 
                             save_best_only=True, 
                             monitor='val_loss')

2025-02-11-17-09-41_lstm_soc_percentage


  super().__init__(name, **kwargs)
2025-02-11 17:09:43.495030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-11 17:09:43.497095: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-02-11 17:09:43.497205: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2025-02-11 17:09:43.497255: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2025-02-11 17:09:43.497560: W tensorflow/stream_execut

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 287, 256)          266240    
                                                                 
 lstm_1 (LSTM)               (None, 287, 256)          525312    
                                                                 
 lstm_2 (LSTM)               (None, 287, 128)          197120    
                                                                 
 dense (Dense)               (None, 287, 64)           8256      
                                                                 
 dense_1 (Dense)             (None, 287, 1)            65        
                                                                 
Total params: 996,993
Trainable params: 996,993
Non-trainable params: 0
_________________________________________________________________


In [None]:
history = model.fit(train_x, train_y, 
                                epochs=30, 
                                batch_size=32, 
                                verbose=1,
                                validation_split=0.2,
                                callbacks = [es, mc]
                               )

In [None]:
model.save(data_path + 'results/trained_model/%s.h5' % experiment_name)

hist_df = pd.DataFrame(history.history)
hist_csv_file = data_path + 'results/trained_model/%s_history.csv' % experiment_name
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

### Testing

In [None]:
results = model.evaluate(test_x, test_y)
print(results)

# Data Visualization

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=history.history['loss'],
                    mode='lines', name='train'))
fig.add_trace(go.Scatter(y=history.history['val_loss'],
                    mode='lines', name='validation'))
fig.update_layout(title='Loss trend',
                  xaxis_title='epoch',
                  yaxis_title='loss',
                  width=1400,
                  height=600)
fig.show()

In [None]:
train_predictions = model.predict(train_x)

In [None]:
cycle_num = 0
steps_num = 8000
step_index = np.arange(cycle_num*steps_num, (cycle_num+1)*steps_num)

fig = go.Figure()
fig.add_trace(go.Scatter(x=step_index, y=train_predictions.flatten()[cycle_num*steps_num:(cycle_num+1)*steps_num],
                    mode='lines', name='SoC predicted'))
fig.add_trace(go.Scatter(x=step_index, y=train_y.flatten()[cycle_num*steps_num:(cycle_num+1)*steps_num],
                    mode='lines', name='SoC actual'))
fig.update_layout(title='Results on training',
                  xaxis_title='Cycle',
                  yaxis_title='SoC percentage',
                  width=1400,
                  height=600)
fig.show()

In [None]:
test_predictions = model.predict(test_x)

In [None]:
cycle_num = 0
steps_num = 1000
step_index = np.arange(cycle_num*steps_num, (cycle_num+1)*steps_num)

fig = go.Figure()
fig.add_trace(go.Scatter(x=step_index, y=test_predictions.flatten()[cycle_num*steps_num:(cycle_num+1)*steps_num],
                    mode='lines', name='SoC predicted'))
fig.add_trace(go.Scatter(x=step_index, y=test_y.flatten()[cycle_num*steps_num:(cycle_num+1)*steps_num],
                    mode='lines', name='SoC actual'))
fig.update_layout(title='Results on testing',
                  xaxis_title='Cycle',
                  yaxis_title='SoC percentage',
                  width=1400,
                  height=600)
fig.show()