In [None]:
pip install pandas scikit-learn joblib boto3 influxdb-client

In [76]:
import pandas as pd
import numpy as np
import joblib
import boto3
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [77]:
# Simulated battery data
'''
data = [
    {"batteryId": 1, "stateOfCharge": 0.4384, "stateOfHealth": 98.00, "batteryCurrent": 349.57, "batteryVoltage": 397.63,
     "kmh": 234.00, "distance": 79.35, "batteryTemp": 0.00, "ambientTemp": 20.40},
    {"batteryId": 2, "stateOfCharge": 0.1234, "stateOfHealth": 95.00, "batteryCurrent": 500.00, "batteryVoltage": 350.00,
     "kmh": 200.00, "distance": 100.50, "batteryTemp": 60.00, "ambientTemp": 25.40},
    {"batteryId": 3, "stateOfCharge": 0.7684, "stateOfHealth": 99.00, "batteryCurrent": 200.00, "batteryVoltage": 400.00,
     "kmh": 180.00, "distance": 120.00, "batteryTemp": 30.00, "ambientTemp": 20.00},
]
'''
# Convert to DataFrame
#df = pd.DataFrame(data)

# Read the data from file
df = pd.read_csv("./data/battery_data.csv")

print(df)

                            timestamp  batteryId  ambientTemp  batteryCurrent  \
0    2025-02-06 16:07:33.238000+00:00          1         20.4           75.72   
1    2025-02-06 16:07:35.825000+00:00          1         20.4           73.56   
2    2025-02-06 16:07:38.980000+00:00          1         20.4           85.90   
3    2025-02-06 16:07:41.790000+00:00          1         20.4           78.22   
4    2025-02-06 16:07:44.777000+00:00          1         20.4           72.94   
..                                ...        ...          ...             ...   
287  2025-02-06 16:21:53.768000+00:00          1         20.4          250.01   
288  2025-02-06 16:21:56.767000+00:00          1         20.4          275.01   
289  2025-02-06 16:21:59.770000+00:00          1         20.4          280.10   
290  2025-02-06 16:22:02.777000+00:00          1         20.4          287.28   
291  2025-02-06 16:22:05.768000+00:00          1         20.4          295.84   

     batteryTemp  batteryVo

In [78]:
# Define stress condition (1 = Stress, 0 = Normal)
def detect_stress(row):
    if row["batteryCurrent"] > 250 or row["batteryTemp"] > 50 or row["stateOfCharge"] < 0.01 or row["batteryVoltage"] < 390:
        return 1  # Stress condition
    return 0  # Normal condition

# Apply stress detection
df["stressIndicator"] = df.apply(detect_stress, axis=1)

print(df)

                            timestamp  batteryId  ambientTemp  batteryCurrent  \
0    2025-02-06 16:07:33.238000+00:00          1         20.4           75.72   
1    2025-02-06 16:07:35.825000+00:00          1         20.4           73.56   
2    2025-02-06 16:07:38.980000+00:00          1         20.4           85.90   
3    2025-02-06 16:07:41.790000+00:00          1         20.4           78.22   
4    2025-02-06 16:07:44.777000+00:00          1         20.4           72.94   
..                                ...        ...          ...             ...   
287  2025-02-06 16:21:53.768000+00:00          1         20.4          250.01   
288  2025-02-06 16:21:56.767000+00:00          1         20.4          275.01   
289  2025-02-06 16:21:59.770000+00:00          1         20.4          280.10   
290  2025-02-06 16:22:02.777000+00:00          1         20.4          287.28   
291  2025-02-06 16:22:05.768000+00:00          1         20.4          295.84   

     batteryTemp  batteryVo

In [79]:
# Count the number of stress events
stress_count = df["stressIndicator"].sum()

print(f"Total stress events detected: {stress_count}")

Total stress events detected: 65


In [80]:
# Define Features and Target
features = ["stateOfCharge", "stateOfHealth", "batteryCurrent", "batteryVoltage", "kmh", "distance", "batteryTemp", "ambientTemp"]
X = df[features]
y = df["stressIndicator"]

In [81]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [82]:
# Train Model
model = RandomForestClassifier(n_estimators=100, random_state=42, verbose = 1)
model.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


In [83]:
# extracts and prints the average number of nodes and average maximum depth across all decision trees in the random forest.
n_nodes = []
max_depths = []

for ind_tree in model.estimators_:
    n_nodes.append(ind_tree.tree_.node_count)
    max_depths.append(ind_tree.tree_.max_depth)
    
print(f'Average number of nodes {int(np.mean(n_nodes))}')
print(f'Average maximum depth {int(np.mean(max_depths))}')

Average number of nodes 6
Average maximum depth 2


In [84]:
# Feature importance indicates how much each feature contributes to the model's predictions.
fi_model = pd.DataFrame({'feature': features,
                   'importance': model.feature_importances_}).\
                    sort_values('importance', ascending = False)
fi_model.head(10)

Unnamed: 0,feature,importance
2,batteryCurrent,0.43449
4,kmh,0.347537
5,distance,0.09553
0,stateOfCharge,0.082876
3,batteryVoltage,0.039568
1,stateOfHealth,0.0
6,batteryTemp,0.0
7,ambientTemp,0.0


In [85]:
# Evaluate Model, generates a summary of key classification metrics:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

Model Accuracy: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        49
           1       1.00      1.00      1.00        10

    accuracy                           1.00        59
   macro avg       1.00      1.00      1.00        59
weighted avg       1.00      1.00      1.00        59



[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


In [86]:
# Save Model
joblib.dump(model, "./models/battery_stress_model.pkl")
print("Model saved as battery_stress_model.pkl")

Model saved as battery_stress_model.pkl
