#### Read Data

In [32]:
import pandas as pd
import numpy as np
data = pd.read_csv('../dataset/dataset_clean.csv')
del data['Index']

In [33]:
print(data.shape)
data.head()

(44657, 18)


Unnamed: 0,Delivery_person_Age,Delivery_person_ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_order,Time_order_picked,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,Multiple_deliveries,Festival,City,Distance,Time_taken_(min)
0,36.0,4.5,23.351058,85.325731,23.371058,85.345731,2022-03-24 11:25:00,2022-03-24 11:35:00,Cloudy,High,1,Drinks,motorcycle,1.0,No,Metropolitian,3.018911,25.0
1,37.0,4.7,12.934365,77.616155,13.024365,77.706155,2022-03-25 17:40:00,2022-03-25 17:50:00,Fog,Medium,1,Meal,scooter,1.0,No,Metropolitian,13.973178,40.0
2,25.0,4.9,22.31279,73.170283,22.35279,73.210283,2022-03-19 13:40:00,2022-03-19 13:55:00,Cloudy,High,1,Drinks,scooter,1.0,No,Urban,6.058825,30.0
3,28.0,4.5,17.433809,78.386744,17.563809,78.516744,2022-03-10 23:35:00,2022-03-10 23:45:00,Sandstorms,Low,1,Drinks,motorcycle,1.0,No,Metropolitian,19.97552,11.0
4,22.0,4.5,12.310972,76.659264,12.390972,76.739264,2022-03-10 20:40:00,2022-03-10 20:55:00,Fog,Jam,0,Snack,motorcycle,1.0,No,Metropolitian,12.43554,47.0


In [34]:
data.dtypes

Delivery_person_Age            float64
Delivery_person_ratings        float64
Restaurant_latitude            float64
Restaurant_longitude           float64
Delivery_location_latitude     float64
Delivery_location_longitude    float64
Time_order                      object
Time_order_picked               object
Weather_conditions              object
Road_traffic_density            object
Vehicle_condition                int64
Type_of_order                   object
Type_of_vehicle                 object
Multiple_deliveries            float64
Festival                        object
City                            object
Distance                       float64
Time_taken_(min)               float64
dtype: object

#### Encoder object features

In [35]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in data.columns:
    if data[col].dtype == 'object':
        data[col] = le.fit_transform(data[col])
        
data.dtypes

Delivery_person_Age            float64
Delivery_person_ratings        float64
Restaurant_latitude            float64
Restaurant_longitude           float64
Delivery_location_latitude     float64
Delivery_location_longitude    float64
Time_order                       int32
Time_order_picked                int32
Weather_conditions               int32
Road_traffic_density             int32
Vehicle_condition                int64
Type_of_order                    int32
Type_of_vehicle                  int32
Multiple_deliveries            float64
Festival                         int32
City                             int32
Distance                       float64
Time_taken_(min)               float64
dtype: object

In [36]:
# feature
feature = data.drop(labels= 'Time_taken_(min)',axis = 1)
feature
# target
target = data['Time_taken_(min)']

In [37]:
feature.dtypes

Delivery_person_Age            float64
Delivery_person_ratings        float64
Restaurant_latitude            float64
Restaurant_longitude           float64
Delivery_location_latitude     float64
Delivery_location_longitude    float64
Time_order                       int32
Time_order_picked                int32
Weather_conditions               int32
Road_traffic_density             int32
Vehicle_condition                int64
Type_of_order                    int32
Type_of_vehicle                  int32
Multiple_deliveries            float64
Festival                         int32
City                             int32
Distance                       float64
dtype: object

In [38]:
feature.shape,target.shape

((44657, 17), (44657,))

#### Determine number of features 

In [39]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import LinearRegression as LR

In [40]:
feature_names = tuple(feature.columns)
feature_names

('Delivery_person_Age',
 'Delivery_person_ratings',
 'Restaurant_latitude',
 'Restaurant_longitude',
 'Delivery_location_latitude',
 'Delivery_location_longitude',
 'Time_order',
 'Time_order_picked',
 'Weather_conditions',
 'Road_traffic_density',
 'Vehicle_condition',
 'Type_of_order',
 'Type_of_vehicle',
 'Multiple_deliveries',
 'Festival',
 'City',
 'Distance')

In [41]:
X = feature
y = target

In [42]:
sfs1 = SFS(
        LR(), 
        k_features = 7, # select k best feature [1 - X.shape[1]]
        forward=True, 
        floating=False, 
        verbose=2,
        scoring='r2',
        cv=10) # cross-valdation

sfs1 = sfs1.fit(X, y)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:    1.1s finished

[2022-11-19 15:38:12] Features: 1/7 -- score: 0.14468258934064826[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:    1.2s finished

[2022-11-19 15:38:13] Features: 2/7 -- score: 0.2319968862523401[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    1.1s finished

[2022-11-19 15:38:15] Features: 3/7 -- score: 0.294239496343048[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

In [43]:
# result
sfs1.k_feature_names_

('Delivery_person_Age',
 'Delivery_person_ratings',
 'Weather_conditions',
 'Road_traffic_density',
 'Vehicle_condition',
 'Multiple_deliveries',
 'Distance')

In [44]:
# save 
def data_selection(*feature):
    df = pd.DataFrame()
    for i in np.array(feature):
        df[i] = data[i]
    return df
#data_selection = data_selection(sfs1.k_feature_names_)
#data_selection['Time_taken_(min)'] = target
#data_selection.to_csv('../dataset/dataset_selection.csv')

In [45]:
X = data_selection(sfs1.k_feature_names_)
y = target

In [46]:
X.shape,y.shape

((44657, 7), (44657,))

#### Split data

In [47]:
from sklearn.model_selection import train_test_split
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 43)

In [48]:
X_train.shape,y_train.shape

((35725, 7), (35725,))

In [49]:
X_test.shape,y_test.shape

((8932, 7), (8932,))

#### Poly Regression

In [50]:
from sklearn.preprocessing import PolynomialFeatures

d = PolynomialFeatures(degree = 5)
X_poly = d.fit_transform(X_train)

model_poly = LR()
model_poly.fit(X_poly, y_train)



LinearRegression()

In [51]:
yHat_poly = model_poly.predict(d.fit_transform(X_test))



In [52]:
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2
print('MAE: ', mae(yHat_poly,y_test))
print('MSE: ', mse(yHat_poly,y_test))
print('R2: ', r2(yHat_poly,y_test))

MAE:  3.960072784169457
MSE:  142.53316228051082
R2:  0.22025764230477796


In [53]:
#from sklearn.preprocessing import PolynomialFeatures

#d = PolynomialFeatures(degree = 5)
#X_poly = d.fit_transform(X)

#model_poly = LR()
#model_poly.fit(X_poly, y)

#yHat_poly = model_poly.predict(d.fit_transform(X))



In [54]:
#from sklearn.metrics import mean_absolute_error as mae
#from sklearn.metrics import mean_squared_error as mse
#from sklearn.metrics import r2_score as r2
#print('MAE: ', mae(yHat_poly,y))
#print('MSE: ', mse(yHat_poly,y))
#print('R2: ', r2(yHat_poly,y))

MAE:  3.7182078864425656
MSE:  21.76375622668586
R2:  0.67130049964764


### Keras

In [24]:
X_train.shape,y_train.shape

((35725, 7), (35725,))

In [25]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [26]:
model = Sequential()
model.add(Dense(500, input_dim=X_train.shape[1], activation= "tanh"))
model.add(Dense(100, activation= "tanh"))
model.add(Dense(50, activation= "tanh"))
model.add(Dense(1))
model.summary()
#model.save('model.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 500)               4000      
                                                                 
 dense_1 (Dense)             (None, 100)               50100     
                                                                 
 dense_2 (Dense)             (None, 50)                5050      
                                                                 
 dense_3 (Dense)             (None, 1)                 51        
                                                                 
Total params: 59,201
Trainable params: 59,201
Non-trainable params: 0
_________________________________________________________________
INFO:tensorflow:Assets written to: model.\assets


In [27]:
model.compile(loss= "mean_absolute_error" , optimizer="adam", metrics=["mean_absolute_error"])
model.fit(X_train, y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1fbc4b92350>

In [28]:
yHat_keras = model.predict(X_test)

In [31]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2
print('MAE: ', mae(yHat_keras,y_test))
print('MSE: ',mse(yHat_keras,y_test))
print('R2: ', r2(yHat_keras,y_test))

MAE:  3.573914236195996
MSE:  20.374117887354437
R2:  0.7109512630242896


In [55]:
#model.compile(loss= "mean_absolute_error" , optimizer="adam", metrics=["mean_absolute_error"])
#model.fit(X, y, epochs=20)
#yHat_keras = model.predict(X)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [57]:
#from sklearn.metrics import mean_squared_error as mse
#from sklearn.metrics import mean_absolute_error as mae
#from sklearn.metrics import r2_score as r2
#print('MAE: ', mae(yHat_keras,y))
#print('MSE: ',mse(yHat_keras,y))
#print('R2: ', r2(yHat_keras,y))

MAE:  3.6150417681101477
MSE:  21.731943012075728
R2:  0.6516780054285047
