In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
# Load the dataset
data = pd.read_csv("final_data2.csv")
# Define features (X) and target variable (y)
X = data[['Days for shipping (real)', 'Days for shipment (scheduled)', 'Late_delivery_risk', 
          'Latitude', 'Longitude','order city latitude','order city longitude','Shipping Mode','processing_time','distance','multiplier','approx_time','transit_time']]
y = data['effective_delivery_time']  # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess categorical variables using one-hot encoding
categorical_features = ['Shipping Mode']
preprocessor = ColumnTransformer(transformers=[('cat', OneHotEncoder(), categorical_features)], remainder='passthrough')
X_train_encoded = preprocessor.fit_transform(X_train)
X_test_encoded = preprocessor.transform(X_test)

# Training a random forest regression model
model = RandomForestRegressor()
model.fit(X_train_encoded,y_train)

In [9]:
import numpy as np
# Prepare the input data
new_data = pd.DataFrame({
    'Days for shipping (real)': [6],
    'Days for shipment (scheduled)': [2],
    'Late_delivery_risk': [1],
    'Latitude': [18.27199936],
    'Longitude': [-66.03704834],
    'order city latitude': [17.9689],
    'order city longitude': [79.5941],
    'Shipping mode':['First Class']
})
#calculating the other essential features based on which we have trained the model
processing_time = new_data['Days for shipping (real)'].iloc[0]   #PROCESSING TIME
print("processing time:",processing_time)
#calculating the distance
import math

def haversine(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)
    
    # Haversine formula
    dlon = lon2_rad - lon1_rad
    dlat = lat2_rad - lat1_rad
    a = math.sin(dlat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = 6371 * c  # Radius of the Earth in kilometers
    return distance

# Extract latitude and longitude from new_data DataFrame
lat1 = new_data['Latitude'].iloc[0]
lon1 = new_data['Longitude'].iloc[0]
lat2 = new_data['order city latitude'].iloc[0]
lon2 = new_data['order city longitude'].iloc[0]
# Calculate distance using the haversine formula
distance = haversine(lat1, lon1, lat2, lon2)
print("Distance:", distance)

#determining the multiplier value
# Calculate multiplier based on Shipping mode
shipping_mode_multipliers = {
    'Same Day': 0,
    'First Class': 0.5,
    'Second Class': 0.8,
    'Standard Class': 1
}
def calculate_multiplier(shipping_mode):
    return shipping_mode_multipliers.get(shipping_mode, 1)
multiplier = calculate_multiplier(new_data['Shipping mode'].iloc[0])
print("multiplier:",multiplier)
#calculating the approx_time
approx_time = round(distance * multiplier, 4)
print("approx time:",approx_time)

#calculating the transit time
from sklearn.preprocessing import MinMaxScaler
# Define the columns to normalize
# Initialize the MinMaxScaler
scaler = MinMaxScaler(feature_range=(1, 4))
transit_time = scaler.fit_transform(np.array(approx_time).reshape(-1, 1))
# Round the normalized values to 4 decimal places
#transit_time=np.round(transit_time).astype(int)
transit_time = int(round(transit_time[0][0]))
print("transit time:",transit_time)


# Use the trained model to make predictions on the new input data
#new_predictions = np.round(model1.predict(new_data)).astype(int)

#print("Predicted delivery time:", new_predictions)


processing time: 6
Distance: 14505.907604593218
multiplier: 0.5
approx time: 7252.9538
transit time: 1


In [17]:
# Include the calculated transit time in the new_data DataFrame
new_data['processing_time'] = processing_time
new_data['distance'] = distance
new_data['multiplier'] = multiplier
new_data['approx_time'] = approx_time
new_data['transit_time'] = transit_time

# Add the 'Shipping Mode' column with a default value
new_data['Shipping Mode'] = 'Standard Class'  # or any default value that fits your context

# Ensure correct column order
new_data = new_data[['Days for shipping (real)', 'Days for shipment (scheduled)', 'Late_delivery_risk',
                     'Latitude', 'Longitude', 'order city latitude', 'order city longitude',
                     'Shipping Mode', 'processing_time', 'distance', 'multiplier', 'approx_time', 'transit_time']]

# Apply the preprocessor to new_data
new_data_encoded = preprocessor.transform(new_data)

# Using the trained model to predict the estimated delivery period
new_predictions = model.predict(new_data_encoded)

# Rounding the predicted value
new_predictions = int(new_predictions[0].round())
print("Predicted delivery time:", new_predictions)




Predicted delivery time: 9
