In [10]:
!pip install shap

Collecting shap
  Downloading shap-0.46.0-cp312-cp312-win_amd64.whl.metadata (25 kB)
Collecting slicer==0.0.8 (from shap)
  Downloading slicer-0.0.8-py3-none-any.whl.metadata (4.0 kB)
Downloading shap-0.46.0-cp312-cp312-win_amd64.whl (456 kB)
   ---------------------------------------- 0.0/456.2 kB ? eta -:--:--
   --------------------------------------- 456.2/456.2 kB 14.4 MB/s eta 0:00:00
Downloading slicer-0.0.8-py3-none-any.whl (15 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.46.0 slicer-0.0.8


In [1]:
import numpy as np
import pandas as pd 
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from imblearn.over_sampling import SMOTE 
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef

In [2]:
dataset = pd.read_csv(r"enter_path_here")
dataset['Last_Service_Date'] = pd.to_datetime(dataset['Last_Service_Date'], format = "mixed")
dataset['Warranty_Expiry_Date'] = pd.to_datetime(dataset['Warranty_Expiry_Date'], format = "mixed")
dataset['service_year'] = dataset['Last_Service_Date'].dt.year
dataset['service_month'] = dataset['Last_Service_Date'].dt.month
dataset['service_day'] = dataset['Last_Service_Date'].dt.day
dataset['expiry_year'] = dataset['Warranty_Expiry_Date'].dt.year
dataset['expiry_month'] = dataset['Warranty_Expiry_Date'].dt.month
dataset['expiry_day'] = dataset['Warranty_Expiry_Date'].dt.day
dataset = dataset.drop(columns=['Last_Service_Date', 'Warranty_Expiry_Date'])
dataset = dataset[['Vehicle_Model', 'Mileage', 'Reported_Issues', 'Vehicle_Age', 'Fuel_Type', 'Transmission_Type', 'Engine_Size', 'Odometer_Reading', 'Owner_Type', 'Insurance_Premium', 'Service_History', 'Accident_History',
                   'Fuel_Efficiency', 'Tire_Condition', 'Brake_Condition', 'Battery_Status', 'service_year', 'service_month', 'service_day', 'expiry_year', 'expiry_month', 'expiry_day', 'Need_Maintenance']]
X = dataset.iloc[:,:-1]
y = dataset.iloc[:, -1]

In [3]:
columnTransformer = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [0, 4, 5, 8, 13, 14, 15])], remainder = "passthrough")
X = np.array(columnTransformer.fit_transform(X))

In [4]:
imputer = SimpleImputer(missing_values=np.nan, strategy = 'mean')
imputer.fit(X)
X = imputer.transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [11]:
sm = SMOTE(sampling_strategy = 'minority')
X_train, y_train = sm.fit_resample(X_train, y_train)

In [13]:
grid = { 
    'n_estimators': [25, 50, 100, 150], 
    'max_features': ['sqrt', 'log2', None], 
    'max_depth': [3, 6, 9], 
    'max_leaf_nodes': [3, 6, 9], 
} 
rs = RandomizedSearchCV(RandomForestClassifier(), grid)
rs.fit(X_train, y_train)
print(rs.best_estimator_)

RandomForestClassifier(max_depth=6, max_features=None, max_leaf_nodes=6)


In [15]:
classifier = RandomForestClassifier(max_depth=6, max_features=None, max_leaf_nodes=6)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [17]:
f1_weighted = f1_score(y_test, y_pred, average='weighted')
print(f1_weighted)

0.9593478896361892


In [19]:
acc_score = accuracy_score(y_test, y_pred)
print(acc_score)

0.9594
