In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
df = pd.read_csv("battery_dataset.csv")

In [3]:
print(df.head())
print(df.info())


       Time   Current   Voltage  Temperature
0  0.000000  2.360728  2.999311    27.449894
1  1.000008  2.303036  2.999813    27.724861
2  2.000008  2.284371  2.999863    27.545357
3  3.000008  2.221588  2.999863    27.457523
4  4.000008  2.158805  2.999813    27.484249
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 307513 entries, 0 to 307512
Data columns (total 4 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Time         307513 non-null  float64
 1   Current      307513 non-null  float64
 2   Voltage      307513 non-null  float64
 3   Temperature  307513 non-null  float64
dtypes: float64(4)
memory usage: 9.4 MB
None


In [30]:
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

In [4]:
C_nominal = 2.0 

In [32]:
df['time'] = pd.to_datetime(df['time'], errors='coerce')
df['time_diff'] = df['time'].diff().dt.total_seconds().fillna(0)

In [34]:
features = ['current', 'time_diff', 'voltage', 'temperature']

In [36]:
df['delta_capacity'] = (df['current'] * df['time_diff']) / 3600  # Ah
df['SoC'] = 100 - (df['delta_capacity'].cumsum() / C_nominal * 100)
df['SoC'] = df['SoC'].clip(lower=0, upper=100)

In [37]:
df['SoH'] = 100 * (df['voltage'] / df['voltage'].max())


In [38]:
X = df[['current', 'voltage', 'temperature', 'time_diff']]
y_soc = df['SoC']
y_soh = df['SoH']

In [39]:
X_train, X_test, y_soc_train, y_soc_test = train_test_split(X, y_soc, test_size=0.2, random_state=42)
_, _, y_soh_train, y_soh_test = train_test_split(X, y_soh, test_size=0.2, random_state=42)

In [40]:
soc_model = RandomForestRegressor(n_estimators=100, random_state=42)
soc_model.fit(X_train, y_soc_train)


0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [41]:
soh_model = RandomForestRegressor(n_estimators=100, random_state=42)
soh_model.fit(X_train, y_soh_train)


0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [42]:
y_soc_pred = soc_model.predict(X_test)
y_soh_pred = soh_model.predict(X_test)

In [43]:
print("Random Forest Results:")
print("SoC RMSE:", np.sqrt(mean_squared_error(y_soc_test, y_soc_pred)))
print("SoC R²:", r2_score(y_soc_test, y_soc_pred))
print("SoH RMSE:", np.sqrt(mean_squared_error(y_soh_test, y_soh_pred)))
print("SoH R²:", r2_score(y_soh_test, y_soh_pred))

Random Forest Results:
SoC RMSE: 2.345390420415953e-09
SoC R²: 0.260696074167885
SoH RMSE: 0.0003386729574328363
SoH R²: 0.9999999952237217


In [44]:
def create_sequences(data, labels, time_steps=10):
    Xs, ys = [], []
    for i in range(len(data) - time_steps):
        Xs.append(data[i:(i+time_steps)])
        ys.append(labels[i+time_steps])
    return np.array(Xs), np.array(ys)

In [45]:
features = df[['current', 'voltage', 'temperature']].values  

# Labels
labels_soc = df['soc'].values
labels_soh = df['soh'].values

In [46]:
time_steps = 20
X_soc, y_soc_seq = create_sequences(features, labels_soc, time_steps)
X_soh, y_soh_seq = create_sequences(features, labels_soh, time_steps)

In [47]:
split = int(0.8 * len(X_soc))
X_soc_train, X_soc_test = X_soc[:split], X_soc[split:]
y_soc_train, y_soc_test = y_soc_seq[:split], y_soc_seq[split:]

X_soh_train, X_soh_test = X_soh[:split], X_soh[split:]
y_soh_train, y_soh_test = y_soh_seq[:split], y_soh_seq[split:]


In [48]:
def build_lstm():
    model = Sequential([
        LSTM(64, activation='relu', input_shape=(time_steps, features.shape[1])),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [49]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [50]:
soc_lstm = build_lstm()
soc_lstm.fit(X_soc_train, y_soc_train, epochs=5, batch_size=32, validation_split=0.2, verbose=1)


  super().__init__(**kwargs)


Epoch 1/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 82.7192 - val_loss: 12.8781
Epoch 2/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 6ms/step - loss: 0.6335 - val_loss: 2.8410
Epoch 3/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 0.2079 - val_loss: 1.6912
Epoch 4/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 6ms/step - loss: 0.1431 - val_loss: 1.9807
Epoch 5/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 6ms/step - loss: 0.0870 - val_loss: 1.6636


<keras.src.callbacks.history.History at 0x2942f741360>

In [51]:
soh_lstm = build_lstm()
soh_lstm.fit(X_soh_train, y_soh_train, epochs=5, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 35.4515 - val_loss: 8.2412
Epoch 2/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 0.4211 - val_loss: 1.4610
Epoch 3/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 0.2553 - val_loss: 0.5012
Epoch 4/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 0.1703 - val_loss: 0.2393
Epoch 5/5
[1m6150/6150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 6ms/step - loss: 0.1386 - val_loss: 0.2638


<keras.src.callbacks.history.History at 0x294aef97340>

In [53]:
y_soc_lstm_pred = soc_lstm.predict(X_soc_test)
y_soh_lstm_pred = soh_lstm.predict(X_soh_test)

[1m1922/1922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step
[1m1922/1922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step


In [54]:
print("LSTM Results:")
print("SoC RMSE:", np.sqrt(mean_squared_error(y_soc_test, y_soc_lstm_pred)))
print("SoH RMSE:", np.sqrt(mean_squared_error(y_soh_test, y_soh_lstm_pred)))

LSTM Results:
SoC RMSE: 1.9506416060145673
SoH RMSE: 0.8652798267704509


In [57]:
import joblib
features = ['current', 'time_diff', 'voltage', 'temperature']


X = df[features].values
y_soc = df['soc'].values
y_soh = df['soh'].values




# Save feature names
joblib.dump(features, "features.pkl")
joblib.dump(soc_model, "soc_rf.pkl")
joblib.dump(soh_model, "soh_rf.pkl")
soc_lstm.save("soc_lstm.h5")
soh_lstm.save("soh_lstm.h5")

