In [None]:
# Import Python Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score, roc_curve # Importing necessary functions
from google.colab import drive
drive.mount('/content/drive')

# Data Loading:


In [None]:
# Load training and testing datasets
train_data_EV = pd.read_csv('/content/drive/MyDrive/Test.csv')
test_data_EV = pd.read_csv('/content/drive/MyDrive/Train.csv')

# Inspect the data
display(train_data_EV)

# Get Statistical wrap up

In [None]:
train_data_EV.describe()

# Separating our Feature and Target

In [None]:
X_train = train_data_EV[['VehicleSpeed_km_h_']]
y_train = train_data_EV['Battery_Status']

X_test = test_data_EV[['VehicleSpeed_km_h_']]
y_test = test_data_EV['Battery_Status']

# Understand Class

In [None]:
class_counts = train_data_EV['Battery_Status'].value_counts()
print(class_counts)

We Have Imbalanced classes we have 0:78773, 1:28312 this will affect our model predictions

Plot Class Imbalance for better Visualization

In [None]:
# Plot class distribution
plt.bar(class_counts.index, class_counts.values, color=['blue', 'orange'])
plt.xlabel('Battery Status')
plt.ylabel('Number of Instances')
plt.title('Class Distribution of Battery Status')
plt.xticks([0, 1], ['Normal', 'Low'])
plt.show()

# Try to Balance the Data

In [None]:
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

#We Train the model

In [None]:
# Train KNN model
model_knn = KNeighborsClassifier(n_neighbors=3)
model_knn.fit(X_resampled, y_resampled)

# Predict on the test set
y_pred = model_knn.predict(X_test)

# Evaluate the Model Using Metrics

In [None]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# for class 1 probabilities
y_prob = model_knn.predict_proba(X_test)[:, 1]

auc = roc_auc_score(y_test, y_prob)
print(f"Area Under the Curve (AUC): {auc:.4f}")

# Optionally, plot the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob)

plt.figure()
plt.plot(fpr, tpr, color='blue', label=f'ROC curve (area = {auc:.4f})')
plt.plot([0, 1], [0, 1], color='red', linestyle='--')  # Diagonal line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

#Plot Prediction

In [None]:
plt.scatter(X_train, y_train, color='blue', label='Training Data')
plt.scatter(X_test, y_test, color='green', label='True Test Data')
plt.scatter(X_test, y_pred, color='red', marker='x', label='Predicted Test Data')
plt.xlabel('Feature')
plt.ylabel('Target')
plt.legend()
plt.show()