In [181]:
import pandas as pd
import numpy as np
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder,StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,classification_report
import tensorflow as tf
from scikeras.wrappers import KerasClassifier

In [182]:
train_data=pd.read_csv("../dataset/train_data.csv")
test_data=pd.read_csv("../dataset/test_data.csv")

In [183]:
print(f"Shape of the Training data:{train_data.shape}")
print(f"Shape of the Testing data:{test_data.shape}")

Shape of the Training data:(1550, 13)
Shape of the Testing data:(388, 13)


In [184]:
train_data=train_data=train_data.drop(["Unnamed: 0"],axis=1)
test_data=test_data.drop(["Unnamed: 0"],axis=1)

In [185]:
##split train data into the X_train and Y_train
X_train=train_data.drop(["delivery_status"],axis=1)
Y_train=train_data["delivery_status"]

##split test data into the X_test and Y_test
X_test=test_data.drop(["delivery_status"],axis=1)
Y_test=test_data["delivery_status"]

##Adjust the datatype of  assembly_service_requested column
X_test["assembly_service_requested"]=X_test["assembly_service_requested"].astype("object")
##Adjust the datatype of  assembly_service_requested column
X_train["assembly_service_requested"]=X_train["assembly_service_requested"].astype("object")

In [186]:
# ### Numbercal columns 
numerical_features=X_train.select_dtypes(include="number").columns.to_list()
categorical_features=X_train.select_dtypes(include="object").columns.to_list()

###Nominal variables
categorical_features.remove("brand")
##Ordinal variables
ordinal_features=["brand"]

In [187]:
##mapping y variable with the numeric values
y_map={"Failed Delivery":0,"On Going":1,"Delivered":2}
Y_train=Y_train.map(y_map)
Y_test=Y_test.map(y_map)

In [188]:
### Define pipelines
nominal_pipeline=Pipeline(steps=[
    ("One-Hot-Encoder",OneHotEncoder(sparse_output=False,handle_unknown="ignore"))
])
numerical_pipeline=Pipeline(steps=[
    ("Standard Scaler",StandardScaler())
])

ordinal_pipeline=Pipeline(steps=[
    ("Ordinal-Encoder",OrdinalEncoder())
])

transfomer=ColumnTransformer(transformers=[
    ("Numerical Pipeline",numerical_pipeline,numerical_features),
    ("Nominal Pipeline",nominal_pipeline,categorical_features),
    ("Ordinal Pipeline",ordinal_pipeline,ordinal_features),
  
])

final_pipeline=Pipeline(steps=[
    ("Transfomer",transfomer),
    ("Resampling SMOTE",SMOTE(random_state=42)),
])

In [189]:
X_train_resample,Y_train_resample= final_pipeline.fit_resample(X_train,Y_train)

# Get the ColumnTransformer from the pipeline
preprocessor = final_pipeline.named_steps["Transfomer"]

# Get feature names from the ColumnTransformer
feature_names = preprocessor.get_feature_names_out()
X_train_resample=pd.DataFrame(X_train_resample,columns=feature_names)

In [190]:
# Transform test set using the preprocessor only
X_test_preprocessed = final_pipeline.named_steps["Transfomer"].transform(X_test)

feature_names = final_pipeline.named_steps["Transfomer"].get_feature_names_out()
X_test_preprocessed = pd.DataFrame(X_test_preprocessed, columns=feature_names)

In [191]:
##implementing ANN using tensorflow
model=tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_resample.shape[1],)), ##Input Layer
    tf.keras.layers.Dense(10,activation="relu"), ## First Hidden Layer
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(6,activation="relu"), ##Second Hidden Layer
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(8,activation="relu"), ##Third Hidden Layer
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(3,activation="softmax") ##Outpur Layer
])

model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",metrics=["accuracy"])
model.fit(X_train_resample,Y_train_resample,validation_data=(X_test_preprocessed,Y_test),epochs=200,batch_size=20,verbose=1)

Epoch 1/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.3181 - loss: 1.4969 - val_accuracy: 0.2191 - val_loss: 1.1707
Epoch 2/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3342 - loss: 1.3448 - val_accuracy: 0.2320 - val_loss: 1.1652
Epoch 3/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3372 - loss: 1.2517 - val_accuracy: 0.2216 - val_loss: 1.1536
Epoch 4/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3520 - loss: 1.1872 - val_accuracy: 0.2242 - val_loss: 1.1420
Epoch 5/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3303 - loss: 1.1799 - val_accuracy: 0.2191 - val_loss: 1.1296
Epoch 6/200
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3446 - loss: 1.1409 - val_accuracy: 0.2320 - val_loss: 1.1234
Epoch 7/200
[1m116/11

<keras.src.callbacks.history.History at 0x2c2ffe15e70>

In [192]:
y_pred=model.predict(X_test_preprocessed)
y_pred

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


array([[0.30350518, 0.29028884, 0.406206  ],
       [0.3210326 , 0.3083283 , 0.3706391 ],
       [0.3496184 , 0.3665534 , 0.28382826],
       ...,
       [0.33335912, 0.32174334, 0.3448976 ],
       [0.3271211 , 0.3149622 , 0.35791668],
       [0.34359974, 0.3462984 , 0.31010184]],
      shape=(388, 3), dtype=float32)

In [193]:
y_pred= np.argmax(y_pred, axis=1)
accuracy_score(y_pred,Y_test)

0.2912371134020619

In [194]:
y_pred_train=model.predict(X_train_resample)
y_pred_train= np.argmax(y_pred_train, axis=1)
accuracy_score(y_pred_train,Y_train_resample)

[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


0.41938287700999566