# **AI Models for Battery Data 📊**

###Battery Preproccesing


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import SMOTE

# Load the dataset
file_path = "data in row.csv"
df = pd.read_csv(file_path)

# Drop unnecessary columns
df = df.drop(columns=['N#', 'CarID'])

# Encode categorical labels
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Label'])

# Standardize numerical features
scaler = StandardScaler()
feature_columns = df.columns[:-1]  # All columns except 'Label'
df[feature_columns] = scaler.fit_transform(df[feature_columns])

# Apply SMOTE to balance the dataset
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(df[feature_columns], df['Label'])

df_resampled = pd.DataFrame(X_resampled, columns=feature_columns)
df_resampled['Label'] = y_resampled

# Save the preprocessed dataset
df_resampled.to_csv("preprocessed_battery_data.csv", index=False)

print("Preprocessing complete with SMOTE applied. Preprocessed dataset saved as 'preprocessed_battery_data.csv'.")

import pandas as pd
import random

def shuffle_csv(input_file, output_path):
    # Read the CSV file
    df = pd.read_csv(input_file)

    # Shuffle the DataFrame
    df = df.sample(frac=1, random_state=random.randint(1, 10000)).reset_index(drop=True)

    # Save the shuffled data to a new CSV file
    df.to_csv(output_path, index=False)

    print(f"Shuffled data saved to: {output_path}")

# Define file paths
input_file = "/content/preprocessed_battery_data.csv"
output_file = "/content/shuffled_battery_data.csv"

# Shuffle and save the data
shuffle_csv(input_file, output_file)

### XGBOOST Model - Battery Data

In [None]:
# Install necessary packages
!pip install xgboost scikit-learn pandas matplotlib seaborn

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

# Load dataset
file_path = "shuffled_battery_data.csv"
df = pd.read_csv(file_path)

# Display dataset preview
print("Dataset preview:")
display(df.head())

# Check for missing values
print("\nMissing values in dataset:\n", df.isnull().sum())

# Display column names
print("Column names in dataset:", df.columns)

# Identify the correct target column
target_column = "Label"  # This is your target column

# Define X (features) and y (target)
X = df.drop(columns=[target_column])  # Features
y = df[target_column]  # Target variable

# Split the data into Train (70%), Validation (15%), Test (15%) using correct stratification
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Check dataset sizes
print(f"Train size: {X_train.shape[0]}, Validation size: {X_val.shape[0]}, Test size: {X_test.shape[0]}")

# Convert data into XGBoost's DMatrix format (for optimization)
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define XGBoost parameters for classification
params = {
    'objective': 'multi:softmax',  # Use 'binary:logistic' for binary classification
    'num_class': len(np.unique(y)),  # Number of classes
    'eval_metric': 'mlogloss',
    'learning_rate': 0.1,
    'max_depth': 6,
    'n_estimators': 100,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': 42
}

# Train the XGBoost model
model = xgb.train(params, dtrain, num_boost_round=100, evals=[(dval, "Validation")], early_stopping_rounds=10, verbose_eval=10)

# Predictions on validation and test sets
y_val_pred = model.predict(dval)
y_test_pred = model.predict(dtest)

# Convert predictions to integer labels (if needed)
y_val_pred = y_val_pred.astype(int)
y_test_pred = y_test_pred.astype(int)

# Evaluate performance
def evaluate_classification(y_true, y_pred, dataset_name):
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(f"\nPerformance on {dataset_name}:")
    print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

# Evaluate model on validation and test sets
evaluate_classification(y_val, y_val_pred, "Validation Set")
evaluate_classification(y_test, y_test_pred, "Test Set")

# Confusion matrix visualization
def plot_confusion_matrix(y_true, y_pred, dataset_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(y_true), yticklabels=np.unique(y_true))
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix - {dataset_name}")
    plt.show()

# Plot confusion matrices
plot_confusion_matrix(y_val, y_val_pred, "Validation Set")
plot_confusion_matrix(y_test, y_test_pred, "Test Set")

# Feature importance plot
xgb.plot_importance(model)
plt.show()



### ANN Model - Battery Data

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Load the dataset
file_path = "/content/shuffled_battery_data.csv"
df = pd.read_csv(file_path)

# Display the first few rows to inspect the structure
df.head()

#------------------------------------------------------

# Separate features and target
X = df.drop(columns=['Label'])  # Feature columns
y = df['Label']  # Target column

# Split data into train (70%), validation (15%), and test (15%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Check dataset sizes
X_train.shape, X_val.shape, X_test.shape

#-----------------------------------------------------

# Define the ANN model
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(40,)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(len(y.unique()), activation='softmax')  # Assuming classification with multiple classes
])

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=25,
    batch_size=32,
    verbose=1
)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Accuracy: {test_accuracy:.4f}')

# Visualization of training history
plt.figure(figsize=(12, 5))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Predictions on test data
y_pred = np.argmax(model.predict(X_test), axis=1)

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

# Model summary
print("Model Summary:")
model.summary()



### Random Forest Model - Battery Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib  # Library for saving and loading the model
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
file_path = "/content/shuffled_battery_data.csv"

df = pd.read_csv(file_path)

# Display the first 5 rows to inspect the data
print(df.head())

# Check columns and ensure there are no missing values
print(df.info())
print(df.isnull().sum())

# Define features and target variable
X = df.drop(columns=['Label'])  # Specify the target column name
y = df['Label']  # Specify the target column name

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Save the model
model_filename = "random_forest_model.pkl"
joblib.dump(model, model_filename)
print(f"Model saved as {model_filename}")

# Load the model for testing
loaded_model = joblib.load(model_filename)

# Make predictions on the test set using the loaded model
y_pred = loaded_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", conf_matrix)

# Visualize the confusion matrix
plt.figure(figsize=(6, 4))
plt.imshow(conf_matrix, cmap='Blues', interpolation='nearest')
plt.colorbar()
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

plt.figure(figsize=(8, 6))
plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
plt.plot(train_sizes, test_mean, 'o-', color='green', label='Validation Score')

plt.xlabel('Training Examples')
plt.ylabel('Accuracy')
plt.title('Learning Curve')

# Adjust the Y-axis scale to make the differences more visible
plt.ylim([0.90, 1.001])  # Set the Y-axis range to be closer

plt.legend(loc='best')
plt.grid()
plt.show()


from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

# Convert y_test to binarized format (One-vs-Rest)
y_test_binarized = label_binarize(y_test, classes=np.unique(y))
y_prob = loaded_model.predict_proba(X_test)  # Classification probabilities for each class

plt.figure(figsize=(8, 6))

# Plot ROC Curve for each class individually
for i, class_label in enumerate(np.unique(y)):
    fpr, tpr, _ = roc_curve(y_test_binarized[:, i], y_prob[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'Class {class_label} (AUC = {roc_auc:.2f})')

# Plot the random guessing line (baseline)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.grid()
plt.show()
