In [150]:
#importing Python libraries and dataset
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
data = pd.read_csv("data.csv")
print(data.head())
#exploratory data analysis
print(data.describe())
#plotting a scatter plot graph for ride duration against cost of ride
fig = px.scatter(data, x='Expected_Ride_Duration',
                 y='Historical_Cost_of_Ride',
                 title='Expected Ride Duration vs Historical Cost of Ride',
                 trendline='ols')
fig.show()


   Number_of_Riders  Number_of_Drivers Location_Category  \
0                90                 45             Urban   
1                58                 39          Suburban   
2                42                 31             Rural   
3                89                 28             Rural   
4                78                 22             Rural   

  Customer_Loyalty_Status  Number_of_Past_Rides  Average_Ratings  \
0                  Silver                    13             4.47   
1                  Silver                    72             4.06   
2                  Silver                     0             3.99   
3                 Regular                    67             4.31   
4                 Regular                    74             3.77   

  Time_of_Booking Vehicle_Type  Expected_Ride_Duration  \
0           Night      Premium                      90   
1         Evening      Economy                      43   
2       Afternoon      Premium                      76  

In [151]:
fig = px.box(data, x='Vehicle_Type',
             y='Historical_Cost_of_Ride',
             title='Historical Cost of Ride Distribution by Vehicle Type')
fig.show()

In [152]:
fig = px.box(data, x='Time_of_Booking',
             y='Historical_Cost_of_Ride',
             title='Historical Cost of Ride Distribution by Time of Booking')
fig.show()

In [153]:
import numpy as np

#calculating demand_multiplier based on percentile for high and low demand
high_demand_percentile = 75
low_demand_percentile = 25

data['demand_multiplier'] = np.where(data['Number_of_Riders'] > np.percentile(data['Number_of_Riders'], high_demand_percentile),
                                     data['Number_of_Riders'] / np.percentile(data['Number_of_Riders'], high_demand_percentile),
                                     data['Number_of_Riders'] / np.percentile(data['Number_of_Riders'], low_demand_percentile))

#calculating supply_multiplier based on percentile for high and low supply
high_supply_percentile = 75
low_supply_percentile = 25

data['supply_multiplier'] = np.where(data['Number_of_Drivers'] > np.percentile(data['Number_of_Drivers'], low_supply_percentile),
                                     np.percentile(data['Number_of_Drivers'], high_supply_percentile) / data['Number_of_Drivers'],
                                     np.percentile(data['Number_of_Drivers'], low_supply_percentile) / data['Number_of_Drivers'])

#defining the price adjustment factors for high and low demand/supply
demand_threshold_high = 1.2 #higher demand threshold
demand_threshold_low = 0.8 #lower demand threshold
supply_threshold_high = 0.8 #higher supply threshold
supply_threshold_low = 1.2 #lower supply threshold

#calculating adjusted_ride_cost for dynamic pricing
data['adjusted_ride_cost'] = data['Historical_Cost_of_Ride'] * (
    np.maximum(data['demand_multiplier'], demand_threshold_low) *
    np.maximum(data['supply_multiplier'], supply_threshold_high)
)

In [154]:
#calclating profit percentage for each ride using dynamic pricing strategy
data['profit_percentage'] = ((data['adjusted_ride_cost'] - data['Historical_Cost_of_Ride']) / data['Historical_Cost_of_Ride']) * 100

#ID profitable rides when profit percentage percentage is positive
profitable_rides = data[data['profit_percentage'] > 0]

#ID rides incurring losses when profit percentage is negative
loss_rides = data[data['profit_percentage'] < 0]

import plotly.graph_objects as go

#calculate the count of profitable and loss rides
profitable_count = len(profitable_rides)
loss_count = len(loss_rides)

#creating a donut chart to show the distribution of profitable and loss rides using the dynamic pricing strategy
labels = ['Profitable Rides', 'Loss Rides']
values = [profitable_count, loss_count]

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.4)])
fig.update_layout(title='Profitability of Rides (Dynamic Pricing vs Historical Pricing)')
fig.show()

In [155]:
fig = px.scatter(data,
                 x='Expected_Ride_Duration',
                 y='adjusted_ride_cost',
                 title='Expected Ride Duration vs Cost of Ride',
                 trendline='ols')
fig.show()

In [156]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def data_preprocessing_pipeline(data):
  #ID numeric and categorical features
  numeric_features = data.select_dtypes(include=['float','int']).columns
  categorical_features = data.select_dtypes(include=['object']).columns

  #Handling missing values in numeric features
  data[numeric_features] = data[numeric_features].fillna(data[numeric_features].mean())

  #Detecting and handling outliers in numeric features using IQR
  for feature in numeric_features:
    Q1 = data[feature].quantile(0.25)
    q3 = data[feature].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = q1 - (1.5 * IQR)
    upper_bound = q3 + (1.5 * IQR)
    data[feature] = np.where((data[feature] < lower_bound) | (data[feature] > upper_bound),
                             data[feature].mean(), data[feature])

    #Handling missing values in categories features
    data[categorical_features] = data[categorical_features].fillna(data[categorical_features].mode().iloc[0])

    return data

In [157]:
data["Vehicle_Type"] = data["Vehicle_Type"].map({"Premium": 1, "Economy": 0})

In [158]:
data["Time_of_Booking"] = data["Time_of_Booking"].map({"Afternoon": 0, "Evening": 1, "Morning": 2, "Night": 3})

In [159]:
from sklearn.model_selection import train_test_split
x = np.array(data[["Number_of_Riders", "Number_of_Drivers", "Vehicle_Type", "Time_of_Booking", "Expected_Ride_Duration"]])
y = np.array(data[["adjusted_ride_cost"]])

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=None)

#Reshape y to 1D array
y_train = y_train.ravel()
y_test = y_test.ravel()

#Training a random forest regression model
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(x_train, y_train)

import pickle
from google.colab import files

# Assume `model` is your trained model object
# Save the model to a pickle file
with open('trained_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Download the pickle file
files.download('trained_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [161]:
#testing the ML model using some input values
def get_vehicle_type_numeric(vehicle_type):
  vehicle_type_mapping = {
      "Premium": 1,
      "Economy": 0
  }
  vehicle_type_numeric = vehicle_type_mapping.get(vehicle_type)
  return vehicle_type_numeric

def get_time_of_booking_numeric(time_of_booking):
  time_of_booking_mapping = {
      "Afternoon": 0,
      "Evening": 1,
      "Morning": 2,
      "Night": 3
  }
  time_of_booking_numeric = time_of_booking_mapping.get(time_of_booking)
  return time_of_booking_numeric

#making predictions using user input values
def predict_price(number_of_riders, number_of_drivers, vehicle_type, time_of_booking, Expected_Ride_Duration):
  vehicle_type_numeric = get_vehicle_type_numeric(vehicle_type)
  if vehicle_type_numeric is None:
    raise ValueError("Invalid vehicle type")

  time_of_booking_numeric = get_time_of_booking_numeric(time_of_booking)
  if time_of_booking_numeric is None:
    raise ValueError("Invalid time of booking")

  input_data = np.array([[number_of_riders, number_of_drivers, vehicle_type_numeric, time_of_booking_numeric, Expected_Ride_Duration]])
  predicted_price = model.predict(input_data)
  return predicted_price

#User input example
user_number_of_riders = int(input("Enter the number of riders"))
user_number_of_drivers = int(input("Enter the number of drivers"))
user_vehicle_type = input("Enter Vehicle type")
user_time_of_booking = input("Enter booking time")
Expected_Ride_Duration =float(input("Enter Expected Ride duration"))
from sklearn.preprocessing import MinMaxScaler

data1 = pd.read_csv("weather.csv")
temparray=data1['temp'].values
rainarray=data1['rain'].values
cloudsarray=data1['clouds'].values
pressurearray=data1['pressure'].values
humidityarray=data1['humidity'].values
windarray=data1['wind'].values

#temp
X_train = temparray.reshape(-1,1) # Your training data here
tempscaler = MinMaxScaler()
tempscaler.fit(X_train)
x_normalized = tempscaler.transform([[float(input("Enter Temperature:"))]])
tempnormalized_value = x_normalized[0, 0].round(4)


#rain
X_train = rainarray.reshape(-1,1) # Your training data here
rainscaler = MinMaxScaler()
rainscaler.fit(X_train)
x_normalized = rainscaler.transform([[float(input("Enter Rain Value:"))]])
rainnormalized_value = x_normalized[0, 0].round(4)


#clouds
X_train = cloudsarray.reshape(-1,1) # Your training data here
cloudsscaler = MinMaxScaler()
cloudsscaler.fit(X_train)
x_normalized = cloudsscaler.transform([[float(input("Enter Clouds Value:"))]])
cloudsnormalized_value = x_normalized[0, 0].round(4)


#humidity
X_train = humidityarray.reshape(-1,1) # Your training data here
humidityscaler = MinMaxScaler()
humidityscaler.fit(X_train)
x_normalized = humidityscaler.transform([[float(input("Enter Humidity Value:"))]])
humiditynormalized_value = x_normalized[0, 0].round(4)

#pressure
X_train = pressurearray.reshape(-1,1) # Your training data here
pressurescaler = MinMaxScaler()
pressurescaler.fit(X_train)
x_normalized = pressurescaler.transform([[float(input("Enter Pressure Value:"))]])
pressurenormalized_value = x_normalized[0, 0].round(4)

#wind
X_train = windarray.reshape(-1,1) # Your training data here
windscaler = MinMaxScaler()
windscaler.fit(X_train)
x_normalized = windscaler.transform([[float(input("Enter Wind Value:"))]])
windnormalized_value = x_normalized[0, 0].round(4)
predicted_price = predict_price(user_number_of_riders, user_number_of_drivers, user_vehicle_type, user_time_of_booking, Expected_Ride_Duration)

KeyboardInterrupt: Interrupted by user

In [None]:
print(data.head())

In [162]:
from sklearn.model_selection import train_test_split
x = np.array(data[["temp", "clouds", "pressure", "rain", "wind","humidity"]])
y = np.array(data[["surge_factor"]])

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

#Reshape y to 1D array
y_train = y_train.ravel()
y_test = y_test.ravel()

#Training a random forest regression model
from sklearn.ensemble import RandomForestRegressor
model1 = RandomForestRegressor()
model1.fit(x_train, y_train)

In [163]:
def predict_surge_factor(temp, clouds,pressure, rain, wind,humidity):
  input_data = np.array([[temp, clouds, pressure, rain,wind,humidity]])
  predicted_price = model1.predict(input_data)
  return predicted_price
predicted_surge = predict_surge_factor(tempnormalized_value,cloudsnormalized_value,pressurenormalized_value,rainnormalized_value,windnormalized_value,humiditynormalized_value)




In [None]:
final_amout_predicted=predicted_surge*predicted_price
print("The Final amount predicted is:{}".format(final_amout_predicted))

In [None]:
!pip install pickle

In [166]:
import pickle
from google.colab import files

# Assume `model` is your trained model object
# Save the model to a pickle file
with open('trained_model1.pkl', 'wb') as f:
    pickle.dump(model1, f)

# Download the pickle file
files.download('trained_model1.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip show scikit-learn