### **Import Required Libraries**

In [75]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from sklearn.metrics import classification_report,accuracy_score
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [76]:
train_data=pd.read_csv("../dataset/train_data.csv")
test_data=pd.read_csv("../dataset/test_data.csv")

In [77]:
print(f"Shape of the Training data:{train_data.shape}")
print(f"Shape of the Testing data:{test_data.shape}")

Shape of the Training data:(1550, 13)
Shape of the Testing data:(388, 13)


In [78]:
train_data=train_data.drop(["Unnamed: 0"],axis=1)
test_data=test_data.drop(["Unnamed: 0"],axis=1)

In [79]:
##split train data into the X_train and Y_train
X_train=train_data.drop(["delivery_status"],axis=1)
Y_train=train_data["delivery_status"]

In [80]:
##Adjust the datatype of  assembly_service_requested column
X_train["assembly_service_requested"]=X_train["assembly_service_requested"].astype("object")

In [81]:
# ### Numbercal columns 
numerical_features=X_train.select_dtypes(include="number").columns.to_list()
categorical_features=X_train.select_dtypes(include="object").columns.to_list()

# ##Nominal variables
categorical_features.remove("brand")
##Ordinal variables
ordinal_features=["brand"]

In [82]:
### Define pipelines
nominal_pipeline=Pipeline(steps=[
    ("One-Hot-Encoder",OneHotEncoder(sparse_output=False,handle_unknown="ignore"))
])
numerical_pipeline=Pipeline(steps=[
    ("Scaler",StandardScaler())
])

ordinal_pipeline=Pipeline(steps=[
    ("Ordinal-Encoder",OrdinalEncoder())
])


transfomer=ColumnTransformer(transformers=[
    ("Numerical Pipeline",numerical_pipeline,numerical_features),
    ("Nominal Pipeline",nominal_pipeline,categorical_features),
    ("Ordinal Pipeline",ordinal_pipeline,ordinal_features)
])

final_pipeline=Pipeline(steps=[
    ("Transfomer",transfomer),
    ("Resampling SMOTE",SMOTE(random_state=42)),
    ("svc",SVC(random_state=42))
])

## Execute Pipeline
final_pipeline.fit(X_train,Y_train)

0,1,2
,steps,"[('Transfomer', ...), ('Resampling SMOTE', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('Numerical Pipeline', ...), ('Nominal Pipeline', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,categories,'auto'
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,unknown_value,
,encoded_missing_value,
,min_frequency,
,max_categories,

0,1,2
,sampling_strategy,'auto'
,random_state,42
,k_neighbors,5

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [83]:
##split test data into the X_test and Y_test
X_test=test_data.drop(["delivery_status"],axis=1)
Y_test=test_data["delivery_status"]

##Adjust the datatype of  assembly_service_requested column
X_test["assembly_service_requested"]=X_test["assembly_service_requested"].astype("object")

In [84]:
y_pred=final_pipeline.predict(X_test) ##get the predicted values

In [85]:
##print the classification report
print(classification_report(Y_test,y_pred))

                 precision    recall  f1-score   support

      Delivered       0.19      0.32      0.24        69
Failed Delivery       0.49      0.40      0.44       192
       On Going       0.31      0.29      0.30       127

       accuracy                           0.35       388
      macro avg       0.33      0.34      0.33       388
   weighted avg       0.38      0.35      0.36       388



In [86]:
y_pred_train=final_pipeline.predict(X_train)

In [87]:
##print the classification report
print(classification_report(Y_train,y_pred_train))

                 precision    recall  f1-score   support

      Delivered       0.46      0.64      0.54       275
Failed Delivery       0.73      0.63      0.67       767
       On Going       0.61      0.61      0.61       508

       accuracy                           0.62      1550
      macro avg       0.60      0.63      0.61      1550
   weighted avg       0.64      0.62      0.63      1550



### **Hyperparamter Tune**

In [88]:
# Define parameter grid
param_grid = {
    'svc__C': [0.1, 1, 10, 100],                
    'svc__kernel': ['linear', 'rbf', 'poly'],     
    'svc__gamma': ['scale', 'auto']                                 
}

In [89]:
# Grid search with cross-validation
grid_search = GridSearchCV(
    estimator=final_pipeline,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,                
    n_jobs=-1,           
)

# Fit the model
grid_search.fit(X_train, Y_train)

0,1,2
,estimator,Pipeline(step...m_state=42))])
,param_grid,"{'svc__C': [0.1, 1, ...], 'svc__gamma': ['scale', 'auto'], 'svc__kernel': ['linear', 'rbf', ...]}"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,transformers,"[('Numerical Pipeline', ...), ('Nominal Pipeline', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,categories,'auto'
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,unknown_value,
,encoded_missing_value,
,min_frequency,
,max_categories,

0,1,2
,sampling_strategy,'auto'
,random_state,42
,k_neighbors,5

0,1,2
,C,100
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [90]:
# Best model and parameters
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'svc__C': 100, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}


In [91]:
# Evaluate best model
best_svc =grid_search.best_estimator_
y_pred_cv = best_svc.predict(X_test)

In [92]:
print("\nTest Accuracy:", accuracy_score(Y_test, y_pred_cv))
print("\nClassification Report:\n", classification_report(Y_test, y_pred_cv))


Test Accuracy: 0.35309278350515466

Classification Report:
                  precision    recall  f1-score   support

      Delivered       0.12      0.16      0.14        69
Failed Delivery       0.49      0.46      0.48       192
       On Going       0.31      0.30      0.31       127

       accuracy                           0.35       388
      macro avg       0.31      0.31      0.31       388
   weighted avg       0.37      0.35      0.36       388



In [93]:
y_pred_train_cv=best_svc.predict(X_train)

In [94]:
##print the classification report
print("\nTrain Accuracy:", accuracy_score(Y_train, y_pred_train_cv))
print(classification_report(Y_train,y_pred_train_cv))


Train Accuracy: 0.9883870967741936
                 precision    recall  f1-score   support

      Delivered       0.99      1.00      0.99       275
Failed Delivery       0.99      0.99      0.99       767
       On Going       0.98      0.99      0.99       508

       accuracy                           0.99      1550
      macro avg       0.99      0.99      0.99      1550
   weighted avg       0.99      0.99      0.99      1550

