Install pandas an sklearn

In [1]:
!pip install pandas scikit-learn tensorflow


Collecting numpy>=1.21.0 (from pandas)
  Downloading numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata (61 kB)
Downloading numpy-1.26.4-cp311-cp311-win_amd64.whl (15.8 MB)
   ---------------------------------------- 0.0/15.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.8 MB ? eta -:--:--
    --------------------------------------- 0.3/15.8 MB ? eta -:--:--
    --------------------------------------- 0.3/15.8 MB ? eta -:--:--
   - -------------------------------------- 0.5/15.8 MB 479.2 kB/s eta 0:00:32
   - -------------------------------------- 0.5/15.8 MB 479.2 kB/s eta 0:00:32
   - -------------------------------------- 0.5/15.8 MB 479.2 kB/s eta 0:00:32
   - -------------------------------------- 0.5/15.8 MB 479.2 kB/s eta 0:00:32
   - -------------------------------------- 0.5/15.8 MB 479.2 kB/s eta 0:00:32
   - -------------------------------------- 0.5/15.8 MB 479.2 kB/s eta 0:00:32
   - -------------------------------------- 0.8/15.8 MB 299.6 kB/s eta 0:00:

In [2]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

Load data set to the dataframe

In [3]:
import pandas as pd
dataSetLocation = "./batch_battery_simulation.csv"
dataFrame = pd.read_csv(dataSetLocation)

remove unused column (timeDiff and variant)

In [4]:
dataFrame = dataFrame.drop('TimeDiff', axis=1)
dataFrame = dataFrame.drop('Time [s]', axis=1)
dataFrame = dataFrame.drop('Variant', axis=1)

In [5]:
print(dataFrame.head())

   Current [A]  Voltage [V]  X-averaged cell temperature [K]  BatteryCapacity   
0     0.833333     4.144409                           298.15              5.0  \
1     0.833333     4.143481                           298.15              5.0   
2     0.833333     4.142612                           298.15              5.0   
3     0.833333     4.141797                           298.15              5.0   
4     0.833333     4.141031                           298.15              5.0   

   CapacityDiff       SOC  
0      0.000000  1.000000  
1     -0.000231  0.999954  
2     -0.000231  0.999907  
3     -0.000231  0.999861  
4     -0.000231  0.999815  


Check Data Dimension

In [6]:
print(dataFrame.shape)

(1112679, 6)


Check Mean,Median,Mode, Quartile, Standard Deviation

In [7]:
numeric_cols = dataFrame.select_dtypes(include=['number']).columns

stats_list = []
for col in numeric_cols:
    stats = {
        'Column': col,
        'Mean': dataFrame[col].mean(),
        'Median': dataFrame[col].median(),
        'Mode': dataFrame[col].mode().values[0] if not dataFrame[col].mode().empty else None,
        'Q1': dataFrame[col].quantile(0.25),
        'Q3': dataFrame[col].quantile(0.75),
        'Std Dev': dataFrame[col].std()
    }
    stats_list.append(stats)

descriptiveStatisticDataFrame = pd.DataFrame(stats_list)
print(descriptiveStatisticDataFrame)

                            Column        Mean      Median        Mode   
0                      Current [A]    0.656742    0.650000    0.508333  \
1                      Voltage [V]    3.682930    3.709571    2.500010   
2  X-averaged cell temperature [K]  298.150000  298.150000  298.150000   
3                  BatteryCapacity    3.940450    3.900000    3.050000   
4                     CapacityDiff   -0.000182   -0.000181   -0.000141   
5                              SOC    0.388270    0.356111    0.000000   

           Q1          Q3       Std Dev  
0    0.575000    0.741667  9.598274e-02  
1    3.494654    3.948876  3.184094e-01  
2  298.150000  298.150000  7.958082e-13  
3    3.450000    4.450000  5.758965e-01  
4   -0.000206   -0.000160  2.669068e-05  
5    0.034167    0.678056  3.287844e-01  


remove constant feature (X-averaged cell temperature [K])

In [8]:
dataFrame = dataFrame.drop('X-averaged cell temperature [K]', axis=1)
print(dataFrame.head())

   Current [A]  Voltage [V]  BatteryCapacity  CapacityDiff       SOC
0     0.833333     4.144409              5.0      0.000000  1.000000
1     0.833333     4.143481              5.0     -0.000231  0.999954
2     0.833333     4.142612              5.0     -0.000231  0.999907
3     0.833333     4.141797              5.0     -0.000231  0.999861
4     0.833333     4.141031              5.0     -0.000231  0.999815


Split X (Feature: Current, Voltage, CapacityDiff, BatteryCapacity) and Y (target: SOC)

In [9]:
Y = dataFrame['SOC']
X = dataFrame.drop('SOC', axis=1)
print("feature: ")
print(X)
print("target: ")
print(Y)

feature: 
         Current [A]  Voltage [V]  BatteryCapacity  CapacityDiff
0           0.833333     4.144409             5.00      0.000000
1           0.833333     4.143481             5.00     -0.000231
2           0.833333     4.142612             5.00     -0.000231
3           0.833333     4.141797             5.00     -0.000231
4           0.833333     4.141031             5.00     -0.000231
...              ...          ...              ...           ...
1112674     0.508333     2.502233             3.05     -0.000141
1112675     0.508333     2.501581             3.05     -0.000141
1112676     0.508333     2.500929             3.05     -0.000141
1112677     0.508333     2.500276             3.05     -0.000141
1112678     0.508333     2.500010             3.05     -0.000058

[1112679 rows x 4 columns]
target: 
0          1.000000
1          0.999954
2          0.999907
3          0.999861
4          0.999815
             ...   
1112674    0.000000
1112675    0.000000
1112676    0.

Split 0.2 for test, 0.8 for training

In [10]:
# Split into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

Normalize

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Train & Evaluate using TensorFlow

In [12]:
# Neural network Model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'),  
    tf.keras.layers.Dense(32, activation='relu'),                            
    tf.keras.layers.Dense(1, activation='linear')                           
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Predict on the test set
Y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

# Output the results
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

import matplotlib.pyplot as plt
plt.scatter(Y_test, Y_pred)
plt.xlabel("Actual SOC")
plt.ylabel("Predicted SOC")
plt.title("Actual vs Predicted SOC (Neural Network)")
plt.show()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m22254/22254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 4ms/step - loss: 0.0026 - mae: 0.0150 - val_loss: 8.2337e-06 - val_mae: 0.0021
Epoch 2/100
[1m22254/22254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 5ms/step - loss: 1.6591e-05 - mae: 0.0028 - val_loss: 6.4662e-06 - val_mae: 0.0018
Epoch 3/100
[1m22254/22254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 5ms/step - loss: 1.0841e-05 - mae: 0.0022 - val_loss: 5.4943e-06 - val_mae: 0.0018
Epoch 4/100
[1m22254/22254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 4ms/step - loss: 8.5165e-06 - mae: 0.0019 - val_loss: 5.0738e-06 - val_mae: 0.0017
Epoch 5/100
[1m 2747/22254[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1:46[0m 5ms/step - loss: 6.0155e-06 - mae: 0.0016

KeyboardInterrupt: 