# Project: AI for Dynamic Pricing in Toll Roads and Parking

# STEP 1: DATA COLLECTION

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [2]:
# Load and prepare dataset

df = pd.read_csv("D:\\dynamic_pricing_data.csv")
df

Unnamed: 0,timestamp,location,vehicle_type,base_price,traffic_level,occupancy_rate,weather,day_of_week,is_holiday,demand_level
0,28-02-2024 04:50,Zone A,Car,79.32,Low,0.77,Rain,Wednesday,0,61.08
1,08-03-2024 00:50,Zone A,Car,67.24,Low,0.68,Clear,Friday,0,45.72
2,20-01-2024 21:37,Zone A,Car,64.90,High,0.80,Storm,Saturday,0,77.88
3,10-02-2024 21:18,Zone C,Bike,84.75,Low,0.93,Rain,Saturday,1,78.82
4,31-01-2024 23:17,Zone B,Truck,37.23,Medium,0.66,Clear,Wednesday,0,29.49
...,...,...,...,...,...,...,...,...,...,...
4995,20-02-2024 12:21,Zone C,Car,87.67,High,0.69,Storm,Tuesday,0,90.74
4996,21-02-2024 22:45,Zone B,Car,97.35,Medium,0.81,Clear,Wednesday,1,94.62
4997,15-01-2024 20:41,Zone A,Car,31.28,Low,1.00,Rain,Monday,1,31.28
4998,01-01-2024 12:51,Zone C,Car,52.40,Medium,0.82,Clear,Monday,0,51.56


# STEP 2: DATA PREPROCESSING and FEATURE ENCODING

In [3]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

  df['timestamp'] = pd.to_datetime(df['timestamp'])


In [4]:
df

Unnamed: 0,timestamp,location,vehicle_type,base_price,traffic_level,occupancy_rate,weather,day_of_week,is_holiday,demand_level
0,2024-02-28 04:50:00,Zone A,Car,79.32,Low,0.77,Rain,Wednesday,0,61.08
1,2024-03-08 00:50:00,Zone A,Car,67.24,Low,0.68,Clear,Friday,0,45.72
2,2024-01-20 21:37:00,Zone A,Car,64.90,High,0.80,Storm,Saturday,0,77.88
3,2024-02-10 21:18:00,Zone C,Bike,84.75,Low,0.93,Rain,Saturday,1,78.82
4,2024-01-31 23:17:00,Zone B,Truck,37.23,Medium,0.66,Clear,Wednesday,0,29.49
...,...,...,...,...,...,...,...,...,...,...
4995,2024-02-20 12:21:00,Zone C,Car,87.67,High,0.69,Storm,Tuesday,0,90.74
4996,2024-02-21 22:45:00,Zone B,Car,97.35,Medium,0.81,Clear,Wednesday,1,94.62
4997,2024-01-15 20:41:00,Zone A,Car,31.28,Low,1.00,Rain,Monday,1,31.28
4998,2024-01-01 12:51:00,Zone C,Car,52.40,Medium,0.82,Clear,Monday,0,51.56


In [5]:
# Handeling Mission Value

df.isnull().sum()

timestamp         0
location          0
vehicle_type      0
base_price        0
traffic_level     0
occupancy_rate    0
weather           0
day_of_week       0
is_holiday        0
demand_level      0
dtype: int64

In [6]:
df.describe()

Unnamed: 0,timestamp,base_price,occupancy_rate,is_holiday,demand_level
count,5000,5000.0,5000.0,5000.0,5000.0
mean,2024-02-04 04:46:11.676000,60.255012,0.699556,0.4942,52.063532
min,2024-01-01 00:12:00,20.0,0.21,0.0,6.87
25%,2024-01-17 08:59:30,39.79,0.6,0.0,32.15
50%,2024-02-04 08:40:30,60.405,0.7,0.0,48.53
75%,2024-02-21 19:23:30,80.7675,0.8,1.0,68.115
max,2024-03-10 10:08:00,100.0,1.0,1.0,149.35
std,,23.17588,0.146453,0.500016,24.827761


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   timestamp       5000 non-null   datetime64[ns]
 1   location        5000 non-null   object        
 2   vehicle_type    5000 non-null   object        
 3   base_price      5000 non-null   float64       
 4   traffic_level   5000 non-null   object        
 5   occupancy_rate  5000 non-null   float64       
 6   weather         5000 non-null   object        
 7   day_of_week     5000 non-null   object        
 8   is_holiday      5000 non-null   int64         
 9   demand_level    5000 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(5)
memory usage: 390.8+ KB


In [8]:
# Encode categorical columns

from sklearn.preprocessing import LabelEncoder

categorical_cols = ['location', 'vehicle_type', 'traffic_level', 'weather', 'day_of_week']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Drop nulls if any

df = df.dropna()

In [9]:
df

Unnamed: 0,timestamp,location,vehicle_type,base_price,traffic_level,occupancy_rate,weather,day_of_week,is_holiday,demand_level
0,2024-02-28 04:50:00,0,2,79.32,1,0.77,2,6,0,61.08
1,2024-03-08 00:50:00,0,2,67.24,1,0.68,0,0,0,45.72
2,2024-01-20 21:37:00,0,2,64.90,0,0.80,3,2,0,77.88
3,2024-02-10 21:18:00,2,0,84.75,1,0.93,2,2,1,78.82
4,2024-01-31 23:17:00,1,3,37.23,2,0.66,0,6,0,29.49
...,...,...,...,...,...,...,...,...,...,...
4995,2024-02-20 12:21:00,2,2,87.67,0,0.69,3,5,0,90.74
4996,2024-02-21 22:45:00,1,2,97.35,2,0.81,0,6,1,94.62
4997,2024-01-15 20:41:00,0,2,31.28,1,1.00,2,1,1,31.28
4998,2024-01-01 12:51:00,2,2,52.40,2,0.82,0,1,0,51.56


# STEP 3: MODEL TRAINING

**MODEL BUILDING**

In [10]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [12]:
# Define input/output

features = ['location', 'vehicle_type', 'traffic_level', 'occupancy_rate', 'weather', 'day_of_week', 'is_holiday']

X = df[features]
y = df['demand_level']

In [13]:
df

Unnamed: 0,timestamp,location,vehicle_type,base_price,traffic_level,occupancy_rate,weather,day_of_week,is_holiday,demand_level
0,2024-02-28 04:50:00,0,2,79.32,1,0.77,2,6,0,61.08
1,2024-03-08 00:50:00,0,2,67.24,1,0.68,0,0,0,45.72
2,2024-01-20 21:37:00,0,2,64.90,0,0.80,3,2,0,77.88
3,2024-02-10 21:18:00,2,0,84.75,1,0.93,2,2,1,78.82
4,2024-01-31 23:17:00,1,3,37.23,2,0.66,0,6,0,29.49
...,...,...,...,...,...,...,...,...,...,...
4995,2024-02-20 12:21:00,2,2,87.67,0,0.69,3,5,0,90.74
4996,2024-02-21 22:45:00,1,2,97.35,2,0.81,0,6,1,94.62
4997,2024-01-15 20:41:00,0,2,31.28,1,1.00,2,1,1,31.28
4998,2024-01-01 12:51:00,2,2,52.40,2,0.82,0,1,0,51.56


**MODEL TRAIN & TEST**

In [14]:
from sklearn.model_selection import train_test_split

#Split and Train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
model.fit(X_train, y_train)

In [16]:
len(X_train)

4000

In [18]:
len(y_train)

4000

In [19]:
len(X_test)

1000

In [20]:
X_test

Unnamed: 0,location,vehicle_type,traffic_level,occupancy_rate,weather,day_of_week,is_holiday
1501,1,0,2,0.62,0,0,1
2586,1,0,1,0.50,2,1,1
2653,1,2,0,0.85,1,4,1
1055,1,3,0,0.60,2,4,1
705,2,0,2,0.79,1,2,0
...,...,...,...,...,...,...,...
4711,2,2,1,0.79,0,1,0
2313,0,2,1,0.77,1,2,1
3214,0,3,2,0.81,1,1,1
2732,1,2,0,0.78,1,1,1


**MODEL EVALUATION**

In [21]:
# Evaluate
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", round(mae, 2))

Mean Absolute Error: 18.72


In [22]:
import joblib

# Save the trained model
joblib.dump(model, "model.pkl")

# Save all encoders
joblib.dump(label_encoders, "label_encoders.pkl")

['label_encoders.pkl']

# STEP 4: DYNAMIC PRICING LOGIC

In [23]:
def get_dynamic_price(input_row, model):
    """
    Predict dynamic price from a single input row.
    """
    input_features = input_row[features].values.reshape(1, -1)
    predicted_price = model.predict(input_features)[0]
    return round(predicted_price, 2)

# Test with random sample

sample_row = df.sample(1).copy()
predicted_price = get_dynamic_price(sample_row, model)

print("Timestamp:", sample_row['timestamp'].values[0])
print("Base Price:", sample_row['base_price'].values[0])
print("Predicted Dynamic Price:", predicted_price)

Timestamp: 2024-02-15T11:21:00.000000000
Base Price: 53.85
Predicted Dynamic Price: 56.61




# STEP 5: SIMULATE USER RESPONDS

In [24]:

def simulate_user_response(predicted_price, base_price):
    """
    Simulates whether the user accepts or declines the price.
    """
    threshold = base_price * 1.1
    if predicted_price <= threshold:
        return random.choices(['accept', 'decline'], weights=[0.75, 0.25])[0]
    else:
        return random.choices(['accept', 'decline'], weights=[0.35, 0.65])[0]

# Use sample row from Step 4
sample_row['predicted_price'] = predicted_price
sample_row['user_response'] = simulate_user_response(predicted_price, sample_row['base_price'].values[0])

# Print result
print(sample_row[['timestamp', 'location', 'vehicle_type', 'base_price', 'predicted_price', 'user_response']])


               timestamp  location  vehicle_type  base_price  predicted_price  \
2596 2024-02-15 11:21:00         2             3       53.85            56.61   

     user_response  
2596       decline  
