In [1]:
import numpy as np
import pandas as pd
data = pd.read_csv("/content/smart_battery_data.csv")

In [2]:
data.isnull().sum()

Unnamed: 0,0
battery_id,0
cycle_count,23
voltage,34
current,21
temperature,20
internal_resistance,34
capacity,23
health_status,0


In [3]:
numeric_df = data.select_dtypes(include=['number'])

In [4]:
for col in numeric_df :
  data[col] = data[col].fillna(data[col].mean())
data.isnull().sum()

Unnamed: 0,0
battery_id,0
cycle_count,0
voltage,0
current,0
temperature,0
internal_resistance,0
capacity,0
health_status,0


In [5]:
categorical = data.select_dtypes(include=['object'])
categorical.head()

Unnamed: 0,battery_id,health_status
0,B001,Good
1,B002,Good
2,B003,Good
3,B004,Good
4,B005,Faulty


In [6]:
#encoding
#Label Encoding - Assign each category an integer
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
data['health_status'] = lb.fit_transform(data['health_status'])
data.head()

Unnamed: 0,battery_id,cycle_count,voltage,current,temperature,internal_resistance,capacity,health_status
0,B001,102.0,4.1,2.72,28.0,68.3,1.12,1
1,B002,435.0,2.81,2.22,52.6,54.6,2.37,1
2,B003,860.0,3.01,3.49,36.3,104.0,1.7,1
3,B004,270.0,3.71,2.76,46.236249,84.9,1.21,1
4,B005,106.0,3.24,2.62,41.7,54.6,1.51,0


In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
for col in numeric_df :
  data[col]=sc.fit_transform(data[[col]])
data.head()

Unnamed: 0,battery_id,cycle_count,voltage,current,temperature,internal_resistance,capacity,health_status
0,B001,-1.405321,1.596147,0.798189,-1.210253,-0.431564,-1.56558,1
1,B002,-0.22726,-1.530406,0.216128,0.422332,-0.739198,1.417823,1
2,B003,1.276272,-1.045669,1.694563,-0.659422,0.37008,-0.181281,1
3,B004,-0.810984,0.65091,0.844754,0.0,-0.058811,-1.350775,1
4,B005,-1.391171,-0.488221,0.681777,-0.301049,-0.739198,-0.634758,0


In [9]:
#Removing Outliers
for col in numeric_df :
  q1 = data[col].quantile(0.25)
  q3 = data[col].quantile(0.75)
  iqr = q3-q1
  lower = q1-1.5*iqr
  upper=q3+1.5*iqr
  data= data[ (data[col]>lower) & (data[col]<upper)]
data.head()

Unnamed: 0,battery_id,cycle_count,voltage,current,temperature,internal_resistance,capacity,health_status
0,B001,-1.405321,1.596147,0.798189,-1.210253,-0.431564,-1.56558,1
1,B002,-0.22726,-1.530406,0.216128,0.422332,-0.739198,1.417823,1
2,B003,1.276272,-1.045669,1.694563,-0.659422,0.37008,-0.181281,1
3,B004,-0.810984,0.65091,0.844754,0.0,-0.058811,-1.350775,1
4,B005,-1.391171,-0.488221,0.681777,-0.301049,-0.739198,-0.634758,0


In [10]:
#splitting dataset
from sklearn.model_selection import train_test_split
data['health_status'].value_counts()
X = data.drop(['health_status','battery_id'],axis=1)
Y = data['health_status']
x_train , x_test , y_train , y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [11]:
#Logistic regression
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(class_weight = 'balanced')
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)
from sklearn.metrics import accuracy_score , confusion_matrix , classification_report , f1_score , recall_score , precision_score
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(recall_score(y_test,y_pred))
print(precision_score(y_test,y_pred))
print(f1_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.5257731958762887
[[15 20]
 [26 36]]
0.5806451612903226
0.6428571428571429
0.6101694915254238
              precision    recall  f1-score   support

           0       0.37      0.43      0.39        35
           1       0.64      0.58      0.61        62

    accuracy                           0.53        97
   macro avg       0.50      0.50      0.50        97
weighted avg       0.54      0.53      0.53        97



In [13]:
#SVM
from sklearn.svm import SVC
sv = SVC(kernel='rbf',class_weight = 'balanced')
sv.fit(x_train,y_train)
y_pred=sv.predict(x_test)
from sklearn.metrics import accuracy_score , precision_score , recall_score , f1_score , confusion_matrix , classification_report
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(recall_score(y_test,y_pred))
print(precision_score(y_test,y_pred))
print(f1_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.5979381443298969
[[17 18]
 [21 41]]
0.6612903225806451
0.6949152542372882
0.6776859504132231
              precision    recall  f1-score   support

           0       0.45      0.49      0.47        35
           1       0.69      0.66      0.68        62

    accuracy                           0.60        97
   macro avg       0.57      0.57      0.57        97
weighted avg       0.61      0.60      0.60        97



In [19]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=27 ,  weights='distance')
knn.fit(X_train_scaled,y_train)
y_pred = knn.predict(X_test_scaled)
print(accuracy_score(y_test,y_pred))

0.6391752577319587
