In [None]:
pip install pandas scikit-learn joblib boto3 influxdb-client

In [1]:
import pandas as pd
import numpy as np
import joblib
import boto3
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Simulated battery data
'''
data = [
    {"batteryId": 1, "stateOfCharge": 0.4384, "stateOfHealth": 98.00, "batteryCurrent": 349.57, "batteryVoltage": 397.63,
     "kmh": 234.00, "distance": 79.35, "batteryTemp": 0.00, "ambientTemp": 20.40},
    {"batteryId": 2, "stateOfCharge": 0.1234, "stateOfHealth": 95.00, "batteryCurrent": 500.00, "batteryVoltage": 350.00,
     "kmh": 200.00, "distance": 100.50, "batteryTemp": 60.00, "ambientTemp": 25.40},
    {"batteryId": 3, "stateOfCharge": 0.7684, "stateOfHealth": 99.00, "batteryCurrent": 200.00, "batteryVoltage": 400.00,
     "kmh": 180.00, "distance": 120.00, "batteryTemp": 30.00, "ambientTemp": 20.00},
]
'''
# Convert to DataFrame
#df = pd.DataFrame(data)

# Read the data from file
df = pd.read_csv("./data/battery_data.csv")

print(df)

                            timestamp  batteryId  ambientTemp  batteryCurrent  \
0    2025-02-12 14:26:34.190000+00:00          1        18.65           78.06   
1    2025-02-12 14:26:37.025000+00:00          1        18.36           81.42   
2    2025-02-12 14:26:40.020000+00:00          1        18.17           75.76   
3    2025-02-12 14:26:43.024000+00:00          1        18.50           72.11   
4    2025-02-12 14:26:46.023000+00:00          1        18.12           73.72   
..                                ...        ...          ...             ...   
501  2025-02-12 14:51:53.470000+00:00          1        19.29           55.85   
502  2025-02-12 14:51:56.469000+00:00          1        19.43           64.87   
503  2025-02-12 14:51:59.469000+00:00          1        19.73           55.13   
504  2025-02-12 14:52:02.469000+00:00          1        19.86           44.47   
505  2025-02-12 14:52:05.475000+00:00          1        20.05           42.55   

     batteryTemp  batteryVo

In [3]:
# Define stress condition (1 = Stress, 0 = Normal)
def detect_stress(row):
    if row["batteryCurrent"] > 400 or row["batteryTemp"] > 50 or row["stateOfCharge"] < 0.05 or row["batteryVoltage"] < 320:
        return 1  # Stress condition
    return 0  # Normal condition

# Apply stress detection
df["stressIndicator"] = df.apply(detect_stress, axis=1)

print(df)

                            timestamp  batteryId  ambientTemp  batteryCurrent  \
0    2025-02-12 14:26:34.190000+00:00          1        18.65           78.06   
1    2025-02-12 14:26:37.025000+00:00          1        18.36           81.42   
2    2025-02-12 14:26:40.020000+00:00          1        18.17           75.76   
3    2025-02-12 14:26:43.024000+00:00          1        18.50           72.11   
4    2025-02-12 14:26:46.023000+00:00          1        18.12           73.72   
..                                ...        ...          ...             ...   
501  2025-02-12 14:51:53.470000+00:00          1        19.29           55.85   
502  2025-02-12 14:51:56.469000+00:00          1        19.43           64.87   
503  2025-02-12 14:51:59.469000+00:00          1        19.73           55.13   
504  2025-02-12 14:52:02.469000+00:00          1        19.86           44.47   
505  2025-02-12 14:52:05.475000+00:00          1        20.05           42.55   

     batteryTemp  batteryVo

In [4]:
# Count the number of stress events
stress_count = df["stressIndicator"].sum()

print(f"Total stress events detected: {stress_count}")

Total stress events detected: 195


In [5]:
# Define Features and Target
features = ["stateOfCharge", "stateOfHealth", "batteryCurrent", "batteryVoltage", "kmh", "distance", "batteryTemp", "ambientTemp"]
X = df[features]
y = df["stressIndicator"]

In [6]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Train Model
model = RandomForestClassifier(n_estimators=100, random_state=42, verbose = 1)
model.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


In [8]:
# extracts and prints the average number of nodes and average maximum depth across all decision trees in the random forest.
n_nodes = []
max_depths = []

for ind_tree in model.estimators_:
    n_nodes.append(ind_tree.tree_.node_count)
    max_depths.append(ind_tree.tree_.max_depth)
    
print(f'Average number of nodes {int(np.mean(n_nodes))}')
print(f'Average maximum depth {int(np.mean(max_depths))}')

Average number of nodes 9
Average maximum depth 3


In [9]:
# Feature importance indicates how much each feature contributes to the model's predictions.
fi_model = pd.DataFrame({'feature': features,
                   'importance': model.feature_importances_}).\
                    sort_values('importance', ascending = False)
fi_model.head(10)

Unnamed: 0,feature,importance
2,batteryCurrent,0.43985
4,kmh,0.189535
6,batteryTemp,0.107606
1,stateOfHealth,0.098203
5,distance,0.057324
3,batteryVoltage,0.057202
0,stateOfCharge,0.045008
7,ambientTemp,0.005271


In [10]:
# Evaluate Model, generates a summary of key classification metrics:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

Model Accuracy: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        54
           1       1.00      1.00      1.00        48

    accuracy                           1.00       102
   macro avg       1.00      1.00      1.00       102
weighted avg       1.00      1.00      1.00       102



[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


In [11]:
# Save Model
joblib.dump(model, "./models/battery_stress_model.pkl")
print("Model saved as battery_stress_model.pkl")

Model saved as battery_stress_model.pkl
