In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load your dataset
data = pd.read_csv('./Dataset/dataset1.csv')

# Preprocessing
# Assuming your dataset has columns: 'Delivery_person_ID', 'Delivery_person_Age', 
# 'Delivery_person_Ratings', 'Type_of_order', 'Region', and 'Time_taken(min)'
data['Region'], data['Delivery_Person_Unique_ID'] = data['Delivery_person_ID'].str.extract(r'([A-Z]+)([A-Z0-9]+)')
data['Region'] = data['Region'].astype(str).str.replace('RES', '')

X = data[['Delivery_person_Age', 'Delivery_person_Ratings', 'Type_of_order', 'Region']]
y = data['Time_taken(min)']

categorical_features = ['Type_of_order', 'Region']
numerical_features = ['Delivery_person_Age', 'Delivery_person_Ratings']

one_hot = OneHotEncoder()
scaler = StandardScaler()
transformer = ColumnTransformer([
    ("one_hot", OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ("scaler", scaler, numerical_features)
], remainder='passthrough')

# Creating and fitting the model
rf_model = Pipeline([
    ('transformer', transformer),
    ('random_forest', RandomForestRegressor(n_estimators=50, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf_model.fit(X_train, y_train)




Predicted Delivery Times: [26.45083526 20.94900596]


In [33]:
import pandas as pd
import random

# Generating random test data
num_samples = 200  # Anyone viewing this notebook can adjust the sample size if you have a test data you are free to use that as well


test_data = pd.DataFrame({
    'Delivery_person_Age': [random.choice(age_range) for _ in range(num_samples)],
    'Delivery_person_Ratings': [random.choice(ratings) for _ in range(num_samples)],
    'Type_of_order': [random.choice(order_types) for _ in range(num_samples)],
    'Region': [random.choice(regions) for _ in range(num_samples)]
})




In [34]:
# Predicting delivery times
predicted_delivery_times = rf_model.predict(test_data)

# Adding predictions to the test data DataFrame
test_data['Predicted_Delivery_Time'] = predicted_delivery_times

# Displaying the test data with predictions
print(test_data)


     Delivery_person_Age  Delivery_person_Ratings Type_of_order  Region  \
0                     56                      4.6         Snack  RANCHI   
1                     53                      3.6        Buffet    PUNE   
2                     31                      3.1          Meal     HYD   
3                     21                      4.4         Snack     HYD   
4                     46                      3.5        Buffet    CHEN   
..                   ...                      ...           ...     ...   
195                   53                      3.4          Meal    CHEN   
196                   50                      3.2          Meal     MYS   
197                   23                      3.2         Snack     MYS   
198                   55                      3.7         Snack   COIMB   
199                   42                      4.9        Drinks     KOC   

     Predicted_Delivery_Time  
0                  26.799651  
1                  36.593519  
2     