In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df = pd.read_csv('/content/drive/MyDrive/Datasets/dynamic_pricing.csv')

print(df.head())

   Number_of_Riders  Number_of_Drivers Location_Category  \
0                90                 45             Urban   
1                58                 39          Suburban   
2                42                 31             Rural   
3                89                 28             Rural   
4                78                 22             Rural   

  Customer_Loyalty_Status  Number_of_Past_Rides  Average_Ratings  \
0                  Silver                    13             4.47   
1                  Silver                    72             4.06   
2                  Silver                     0             3.99   
3                 Regular                    67             4.31   
4                 Regular                    74             3.77   

  Time_of_Booking Vehicle_Type  Expected_Ride_Duration  \
0           Night      Premium                      90   
1         Evening      Economy                      43   
2       Afternoon      Premium                      76  

In [None]:
print(df.describe())

       Number_of_Riders  Number_of_Drivers  Number_of_Past_Rides  \
count       1000.000000        1000.000000           1000.000000   
mean          60.372000          27.076000             50.031000   
std           23.701506          19.068346             29.313774   
min           20.000000           5.000000              0.000000   
25%           40.000000          11.000000             25.000000   
50%           60.000000          22.000000             51.000000   
75%           81.000000          38.000000             75.000000   
max          100.000000          89.000000            100.000000   

       Average_Ratings  Expected_Ride_Duration  Historical_Cost_of_Ride  
count      1000.000000              1000.00000              1000.000000  
mean          4.257220                99.58800               372.502623  
std           0.435781                49.16545               187.158756  
min           3.500000                10.00000                25.993449  
25%           3.8

In [None]:
fig = px.scatter(df,x='Expected_Ride_Duration',y='Historical_Cost_of_Ride',trendline='ols')
fig.show()

In [None]:
fig = px.box(df,x='Vehicle_Type',y='Historical_Cost_of_Ride')
fig.show()

In [None]:
df.columns

Index(['Number_of_Riders', 'Number_of_Drivers', 'Location_Category',
       'Customer_Loyalty_Status', 'Number_of_Past_Rides', 'Average_Ratings',
       'Time_of_Booking', 'Vehicle_Type', 'Expected_Ride_Duration',
       'Historical_Cost_of_Ride'],
      dtype='object')

In [None]:
corr_matrix = df.drop(columns=['Location_Category','Customer_Loyalty_Status','Time_of_Booking','Vehicle_Type']).corr()

fig = go.Figure(data=go.Heatmap(z=corr_matrix.values,x=corr_matrix.columns,y=corr_matrix.columns,colorscale='Viridis'))
fig.update_layout(title='Correlation Matrix')
fig.show()

The data provided by the company states that the company uses a pricing model that only takes the expected ride duration as a factor to determine the price for a ride.

we will implement a dynamic pricing strategy aiming to adjust the ride costs dynamically based on the demand and supply levels observed in the data, by this it can capture high-demand periods and low-supply scenarios to increase prices, while low-demand periods and high supply situations will lead to price reductions

In [None]:
len(df['Number_of_Riders'])

1000

In [None]:
sorted_riders = df['Number_of_Riders'].sort_values().reset_index(drop=True)


In [None]:
value_at_index = sorted_riders.iloc[[749,750]]
print(value_at_index)

749    81
750    81
Name: Number_of_Riders, dtype: int64


In [None]:
import numpy as np

In [None]:
print(np.percentile(df['Number_of_Riders'],75))

81.0


In [None]:
#calculating demand multiplier based on high percentile and low percentile
high_demand_percentile = 75
low_demand_percentile = 25

df['demand_multiplier'] = np.where(df['Number_of_Riders']>np.percentile(df['Number_of_Riders'],high_demand_percentile),
                                   df['Number_of_Riders']/np.percentile(df['Number_of_Riders'],high_demand_percentile),
                                   df['Number_of_Riders']/np.percentile(df['Number_of_Riders'],low_demand_percentile))

In [None]:
# Calculate supply_multiplier based on percentile for high and low supply
high_supply_percentile = 75
low_supply_percentile = 25

df['supply_multiplier'] = np.where(df['Number_of_Drivers'] > np.percentile(df['Number_of_Drivers'], low_supply_percentile),
                                     np.percentile(df['Number_of_Drivers'], high_supply_percentile) / df['Number_of_Drivers'],
                                     np.percentile(df['Number_of_Drivers'], low_supply_percentile) / df['Number_of_Drivers'])

In [None]:
# now let's define price adjustment factors for high and low demand/supply

demand_threshold_high = 1.2
demand_threshold_low = 0.8
supply_threshold_high = 0.8
supply_threshold_low = 1.2

# calculate the adjusted ride cost for dynamic pricing

df['adjusted_ride_cost'] = df['Historical_Cost_of_Ride']*(np.maximum(df['demand_multiplier'],demand_threshold_low)*np.maximum(df['supply_multiplier'],supply_threshold_low))

 the adjusted ride cost captures the combined effect of demand and supply multipliers, with the thresholds serving as caps or floors to control the price adjustments

In [None]:
# calcualting profit percentage for each ride

df['profit_percentage'] = ((df['adjusted_ride_cost']-df['Historical_Cost_of_Ride'])/df['Historical_Cost_of_Ride']) * 100

# Identify profit rides when profit percentage is positive.
profitable_rides = df[df['profit_percentage'] > 0]

# Identify loss rides where profit percentage is negative
loss_rides = df[df['profit_percentage'] < 0]


import plotly.graph_objects as go

# calculate count of profitable and loss rides
profitable_count = len(profitable_rides)
loss_count = len(loss_rides)

labels = ['Profitable Rides','Loss Rides']
values = [profitable_count,loss_count]

fig = go.Figure(data=[go.Pie(labels=labels,values=values,hole = 0.4)])
fig.update_layout(title='Profitability of Rides (Dynamic Pricing vs. Historical Pricing)')
fig.show()

In [None]:
fig = px.scatter(df,
                 x='Expected_Ride_Duration',
                 y='adjusted_ride_cost',
                 title='Expected Ride Duration vs. Cost of Ride',
                 trendline='ols')
fig.show()

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def data_preprocessing_pipeline(data):
    #Identify numeric and categorical features
    numeric_features = data.select_dtypes(include=['float', 'int']).columns
    categorical_features = data.select_dtypes(include=['object']).columns

    #Handle missing values in numeric features
    data[numeric_features] = data[numeric_features].fillna(data[numeric_features].mean())

    #Detect and handle outliers in numeric features using IQR
    for feature in numeric_features:
        Q1 = data[feature].quantile(0.25)
        Q3 = data[feature].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - (1.5 * IQR)
        upper_bound = Q3 + (1.5 * IQR)
        data[feature] = np.where((data[feature] < lower_bound) | (data[feature] > upper_bound),
                                 data[feature].mean(), data[feature])

    #Handle missing values in categorical features
    data[categorical_features] = data[categorical_features].fillna(data[categorical_features].mode().iloc[0])

    return data

In [None]:
df["Vehicle_Type"] = df["Vehicle_Type"].map({"Premium": 1,
                                           "Economy": 0})

In [None]:
# splitting data
from sklearn.model_selection import train_test_split
x = np.array(df[["Number_of_Riders", "Number_of_Drivers", "Vehicle_Type", "Expected_Ride_Duration"]])
y = np.array(df[["adjusted_ride_cost"]])

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# Reshape y to 1D array
y_train = y_train.ravel()
y_test = y_test.ravel()

# Training a random forest regression model
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(x_train, y_train)

In [None]:
def get_vehicle_type_numeric(vehicle_type):
    vehicle_type_mapping = {
        "Premium": 1,
        "Economy": 0
    }
    vehicle_type_numeric = vehicle_type_mapping.get(vehicle_type)
    return vehicle_type_numeric

# Predicting using user input values
def predict_price(number_of_riders, number_of_drivers, vehicle_type, Expected_Ride_Duration):
    vehicle_type_numeric = get_vehicle_type_numeric(vehicle_type)
    if vehicle_type_numeric is None:
        raise ValueError("Invalid vehicle type")

    input_data = np.array([[number_of_riders, number_of_drivers, vehicle_type_numeric, Expected_Ride_Duration]])
    predicted_price = model.predict(input_data)
    return predicted_price

# Example prediction using user input values
user_number_of_riders = 50
user_number_of_drivers = 25
user_vehicle_type = "Economy"
Expected_Ride_Duration = 30
predicted_price = predict_price(user_number_of_riders, user_number_of_drivers, user_vehicle_type, Expected_Ride_Duration)
print("Predicted price:", predicted_price)

Predicted price: [235.39859652]


In [None]:
import plotly.graph_objects as go

# Predict on the test set
y_pred = model.predict(x_test)

# Create a scatter plot with actual vs predicted values
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test.flatten(),
    y=y_pred,
    mode='markers',
    name='Actual vs Predicted'
))

# Add a line representing the ideal case
fig.add_trace(go.Scatter(
    x=[min(y_test.flatten()), max(y_test.flatten())],
    y=[min(y_test.flatten()), max(y_test.flatten())],
    mode='lines',
    name='Ideal',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    showlegend=True,
)

fig.show()