In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load the dataset
df = pd.read_excel("C:/Users/i.hanis/PycharmProjects/VehicleMaintenance/data/Cleaned_ServiceRequest.xlsx")

In [3]:
# Drop irrelevant columns
columns_to_drop = ['ID', 'SR', 'timereceived', 'Description', 'Response', 'DateClose']
df.drop(columns=columns_to_drop, axis=1, inplace=True)

# Convert and extract datetime features
df['Datereceived'] = pd.to_datetime(df['Datereceived'], errors='coerce')
df['responseDate'] = pd.to_datetime(df['responseDate'], errors='coerce')

df['received_day'] = df['Datereceived'].dt.day
df['received_month'] = df['Datereceived'].dt.month
df['received_weekday'] = df['Datereceived'].dt.weekday

df['response_day'] = df['responseDate'].dt.day
df['response_month'] = df['responseDate'].dt.month
df['response_weekday'] = df['responseDate'].dt.weekday

# Drop datetime columns
df.drop(['Datereceived', 'responseDate'], axis=1, inplace=True)

# Fill missing values
df.fillna(0, inplace=True)

In [4]:
# Label encode categorical columns
cat_cols = df.select_dtypes(include='object').columns
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col].astype(str))

In [5]:
# Define features and target
X = df.drop('Priority', axis=1)
y = df['Priority']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Train Gradient Boosting model
model = GradientBoostingClassifier(random_state=42)
model.fit(X_train, y_train)

In [7]:
# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9723050161733233
Confusion Matrix:
 [[    2     0     0     0]
 [    0 96479    64   190]
 [    0  2120   215    89]
 [    0   371     0  2799]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00         2
         1.0       0.97      1.00      0.99     96733
         2.0       0.77      0.09      0.16      2424
         3.0       0.91      0.88      0.90      3170

    accuracy                           0.97    102329
   macro avg       0.91      0.74      0.76    102329
weighted avg       0.97      0.97      0.96    102329



In [8]:
import joblib

# Save model
joblib.dump(model, "gradient_boosting_model.pkl")

['gradient_boosting_model.pkl']