### **Import Required Libraries**

In [1]:
import pandas as pd
import numpy as np
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder,StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,classification_report,f1_score
import tensorflow as tf
from scikeras.wrappers import KerasClassifier

### **Import Datasets**

In [2]:
train_data=pd.read_csv("../dataset/train_data.csv")
test_data=pd.read_csv("../dataset/test_data.csv")

In [3]:
print(f"Shape of the Training data:{train_data.shape}")
print(f"Shape of the Testing data:{test_data.shape}")

Shape of the Training data:(1550, 13)
Shape of the Testing data:(388, 13)


### **Drop Unecessary Variables**

In [4]:
train_data=train_data=train_data.drop(["Unnamed: 0"],axis=1)
test_data=test_data.drop(["Unnamed: 0"],axis=1)

In [5]:
##split train data into the X_train and Y_train
X_train=train_data.drop(["delivery_status"],axis=1)
Y_train=train_data["delivery_status"]

##split test data into the X_test and Y_test
X_test=test_data.drop(["delivery_status"],axis=1)
Y_test=test_data["delivery_status"]

##Adjust the datatype of  assembly_service_requested column
X_test["assembly_service_requested"]=X_test["assembly_service_requested"].astype("object")
##Adjust the datatype of  assembly_service_requested column
X_train["assembly_service_requested"]=X_train["assembly_service_requested"].astype("object")

### **Model Building**

In [6]:
# ### Numbercal columns 
numerical_features=X_train.select_dtypes(include="number").columns.to_list()
categorical_features=X_train.select_dtypes(include="object").columns.to_list()

###Nominal variables
categorical_features.remove("brand")
##Ordinal variables
ordinal_features=["brand"]

In [7]:
##mapping y variable with the numeric values
y_map={"Failed Delivery":0,"On Going":1,"Delivered":2}
Y_train=Y_train.map(y_map)
Y_test=Y_test.map(y_map)

In [8]:
### Define pipelines
nominal_pipeline=Pipeline(steps=[
    ("One-Hot-Encoder",OneHotEncoder(sparse_output=False,handle_unknown="ignore"))
])
numerical_pipeline=Pipeline(steps=[
    ("Standard Scaler",StandardScaler())
])

ordinal_pipeline=Pipeline(steps=[
    ("Ordinal-Encoder",OrdinalEncoder())
])

transfomer=ColumnTransformer(transformers=[
    ("Numerical Pipeline",numerical_pipeline,numerical_features),
    ("Nominal Pipeline",nominal_pipeline,categorical_features),
    ("Ordinal Pipeline",ordinal_pipeline,ordinal_features),
  
])

final_pipeline=Pipeline(steps=[
    ("Transfomer",transfomer),
    ("Resampling SMOTE",SMOTE(random_state=42)),
])

In [9]:
X_train_resample,Y_train_resample= final_pipeline.fit_resample(X_train,Y_train)

# Get the ColumnTransformer from the pipeline
preprocessor = final_pipeline.named_steps["Transfomer"]

# Get feature names from the ColumnTransformer
feature_names = preprocessor.get_feature_names_out()
X_train_resample=pd.DataFrame(X_train_resample,columns=feature_names)

In [10]:
# Transform test set using the preprocessor only
X_test_preprocessed = final_pipeline.named_steps["Transfomer"].transform(X_test)

feature_names = final_pipeline.named_steps["Transfomer"].get_feature_names_out()
X_test_preprocessed = pd.DataFrame(X_test_preprocessed, columns=feature_names)

In [11]:
##implementing ANN using tensorflow
model=tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_resample.shape[1],)), ##Input Layer
    tf.keras.layers.Dense(10,activation="relu"), ## First Hidden Layer
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(6,activation="relu"), ##Second Hidden Layer
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(8,activation="relu"), ##Third Hidden Layer
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(3,activation="softmax") ##Outpur Layer
])

model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",metrics=["accuracy"])
model.fit(X_train_resample,Y_train_resample,validation_data=(X_test_preprocessed,Y_test),epochs=200,batch_size=20,verbose=1)

Epoch 1/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3399 - loss: 1.4014 - val_accuracy: 0.3144 - val_loss: 1.1247
Epoch 2/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3281 - loss: 1.2784 - val_accuracy: 0.3376 - val_loss: 1.1111
Epoch 3/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3433 - loss: 1.2201 - val_accuracy: 0.3660 - val_loss: 1.1034
Epoch 4/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3255 - loss: 1.1867 - val_accuracy: 0.3505 - val_loss: 1.1039
Epoch 5/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3251 - loss: 1.1539 - val_accuracy: 0.3840 - val_loss: 1.0994
Epoch 6/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3342 - loss: 1.1304 - val_accuracy: 0.4072 - val_loss: 1.0978
Epoch 7/200
[1m116/11

<keras.src.callbacks.history.History at 0x1d68564e920>

### **Model Evaluation**

In [12]:
y_pred=model.predict(X_test_preprocessed)
y_pred

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


array([[0.30407152, 0.30652857, 0.38939992],
       [0.30406973, 0.30652714, 0.38940316],
       [0.3480261 , 0.34863323, 0.30334067],
       ...,
       [0.30964372, 0.31095842, 0.37939787],
       [0.31329837, 0.31489405, 0.37180758],
       [0.30894428, 0.31040484, 0.38065088]],
      shape=(388, 3), dtype=float32)

In [13]:
y_pred= np.argmax(y_pred, axis=1)

##accuracy,precision,recall,f1 score for test data
accuracy_test = accuracy_score(y_pred,Y_test)
precision_test = precision_score(y_pred,Y_test,average="weighted")
recall_test = recall_score(y_pred,Y_test,average="weighted")
f1_test = f1_score(y_pred,Y_test,average="weighted")

##print accuracy,precision,recall,f1 score
print(f"Accuracy of the ANN model:{accuracy_test}")
print(f"Precision of the ANN model:{precision_test}")
print(f"Recall of the ANN model:{recall_test}")
print(f"F1 Score of the ANN model:{f1_test}")

Accuracy of the ANN model:0.26804123711340205
Precision of the ANN model:0.4508177964020465
Recall of the ANN model:0.26804123711340205
F1 Score of the ANN model:0.28324778704423625


In [14]:
y_pred_train=model.predict(X_train_resample)
y_pred_train= np.argmax(y_pred_train, axis=1)

##accuracy,precision,recall,f1 score for training data
accuracy_train = accuracy_score(y_pred_train,Y_train_resample)
precision_train = precision_score(y_pred_train,Y_train_resample,average="weighted")
recall_train = recall_score(y_pred_train,Y_train_resample,average="weighted")
f1_train = f1_score(y_pred_train,Y_train_resample,average="weighted")

print(f"Accuracy of the ANN model:{accuracy_train}")
print(f"Precision of the ANN model:{precision_train}")
print(f"Recall of the ANN model:{recall_train}")
print(f"F1 Score of the ANN model:{f1_train}")

[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy of the ANN model:0.4228596262494568
Precision of the ANN model:0.5633438667049698
Recall of the ANN model:0.4228596262494568
F1 Score of the ANN model:0.4568485500742887


In [15]:
##print classification report
print("Classification Report:")
print(classification_report(Y_test,y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.41      0.14      0.20       192
           1       0.34      0.28      0.30       127
           2       0.19      0.62      0.30        69

    accuracy                           0.27       388
   macro avg       0.31      0.34      0.27       388
weighted avg       0.35      0.27      0.25       388

