In [160]:
import pandas as pd
import numpy as np


# importing data and acquiring basic info

In [159]:
# Loading dataset
df = pd.read_csv("/content/dataset (1).csv")  # Adjust filename if needed

# basic info
print(" Dataset Shape:", df.shape)
print(" Col Names:", df.columns.tolist())
df.head()


# Checking for missing/null values
print(df.isnull().sum())
df.head()
df.dtypes

 Dataset Shape: (18368, 12)
 Col Names: ['ID', 'SystemCodeNumber', 'Capacity', 'Latitude', 'Longitude', 'Occupancy', 'VehicleType', 'TrafficConditionNearby', 'QueueLength', 'IsSpecialDay', 'LastUpdatedDate', 'LastUpdatedTime']
ID                        0
SystemCodeNumber          0
Capacity                  0
Latitude                  0
Longitude                 0
Occupancy                 0
VehicleType               0
TrafficConditionNearby    0
QueueLength               0
IsSpecialDay              0
LastUpdatedDate           0
LastUpdatedTime           0
dtype: int64


Unnamed: 0,0
ID,int64
SystemCodeNumber,object
Capacity,int64
Latitude,float64
Longitude,float64
Occupancy,int64
VehicleType,object
TrafficConditionNearby,object
QueueLength,int64
IsSpecialDay,int64


data wrangling

In [161]:
# Strip spaces from column names
df.columns = df.columns.str.strip()

# Convert string-like columns (in pd str == object)
df['SystemCodeNumber'] = df['SystemCodeNumber'].astype(str).str.strip()
df['VehicleType'] = df['VehicleType'].astype(str).str.strip()
df['TrafficConditionNearby'] = df['TrafficConditionNearby'].astype(str).str.strip()
df['LastUpdatedDate'] = df['LastUpdatedDate'].astype(str).str.strip()
df['LastUpdatedTime'] = df['LastUpdatedTime'].astype(str).str.strip()

# Convert numeric columns
numeric_columns = ['Capacity', 'Occupancy', 'QueueLength', 'Latitude', 'Longitude', 'IsSpecialDay']
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# convert IsSpecialDay to int
df['IsSpecialDay'] = df['IsSpecialDay'].astype(int)


# Save the cleaned data into file
df.to_csv("cdataset.csv", index=False)

cleaned_df = pd.read_csv("cdataset.csv")
cleaned_df.dtypes
cleaned_df

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00
...,...,...,...,...,...,...,...,...,...,...,...,...
18363,18363,Shopping,1920,26.150504,91.733531,1517,truck,average,6,0,19-12-2016,14:30:00
18364,18364,Shopping,1920,26.150504,91.733531,1487,car,low,3,0,19-12-2016,15:03:00
18365,18365,Shopping,1920,26.150504,91.733531,1432,cycle,low,3,0,19-12-2016,15:29:00
18366,18366,Shopping,1920,26.150504,91.733531,1321,car,low,2,0,19-12-2016,16:03:00


In [162]:
# create timestamp cols

cleaned_df['Timestamp'] = pd.to_datetime(
    df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
    format='%d-%m-%Y %H:%M:%S', errors='coerce' )
cleaned_df.head()



Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00,2016-10-04 08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00,2016-10-04 08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00,2016-10-04 09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00,2016-10-04 09:59:00


# A DASH OF FEATURE ENGINEERING FOR **MODELLING**

In [164]:
# OccupancyRate = Occupancy / Capacity
cleaned_df['OccupancyRate'] = cleaned_df['Occupancy'] / cleaned_df['Capacity']
cleaned_df['OccupancyRate'] = cleaned_df['OccupancyRate'].clip(0, 1)  # Bound between 0–1

# QueueLength normalized (0 to 1)
cleaned_df['QueueLengthNorm'] = cleaned_df['QueueLength'] / cleaned_df['QueueLength'].max()

# VehicleTypeWeight
vehicle_weights = {'bike': 0.5, 'car': 1.0, 'truck': 1.5, 'cycle': 0.2}
cleaned_df['VehicleTypeWeight'] = cleaned_df['VehicleType'].map(vehicle_weights)

# TrafficScore encoding
traffic_map = {'low': 0, 'average': 1, 'high': 2}
cleaned_df['TrafficScore'] = cleaned_df['TrafficConditionNearby'].map(traffic_map)

# Make sure IsSpecialDay is binary integer
cleaned_df['IsSpecialDay'] = cleaned_df['IsSpecialDay'].astype(int)
cleaned_df


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp,OccupancyRate,QueueLengthNorm,VehicleTypeWeight,TrafficScore
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.105719,0.066667,1.0,0
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00,2016-10-04 08:25:00,0.110919,0.066667,1.0,0
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00,2016-10-04 08:59:00,0.138648,0.133333,1.0,0
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00,2016-10-04 09:32:00,0.185442,0.133333,1.0,0
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00,2016-10-04 09:59:00,0.259965,0.133333,0.5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18363,18363,Shopping,1920,26.150504,91.733531,1517,truck,average,6,0,19-12-2016,14:30:00,2016-12-19 14:30:00,0.790104,0.400000,1.5,1
18364,18364,Shopping,1920,26.150504,91.733531,1487,car,low,3,0,19-12-2016,15:03:00,2016-12-19 15:03:00,0.774479,0.200000,1.0,0
18365,18365,Shopping,1920,26.150504,91.733531,1432,cycle,low,3,0,19-12-2016,15:29:00,2016-12-19 15:29:00,0.745833,0.200000,0.2,0
18366,18366,Shopping,1920,26.150504,91.733531,1321,car,low,2,0,19-12-2016,16:03:00,2016-12-19 16:03:00,0.688021,0.133333,1.0,0


MODEL 1 **IMPLEMENTATION**

In [165]:
# Model 1 - Baseline Linear Pricing

# Copy the clean dataframe
model1_df = cleaned_df.copy()

# Sort by parking lot and timestamp for time based ordering
model1_df.sort_values(by=['SystemCodeNumber', 'Timestamp'], inplace=True)

# base price col
base_price = 10
alpha = 2
model1_df['Model1Price'] = 0.0

# Looping to each parking lot separately
for lot_id in model1_df['SystemCodeNumber'].unique():
    lot_mask = model1_df['SystemCodeNumber'] == lot_id
    lot_data = model1_df[lot_mask]

    prices = [base_price]  # start price

    for i in range(1, len(lot_data)):
        occ_rate = lot_data.iloc[i]['OccupancyRate']
        new_price = prices[-1] + alpha * occ_rate
        new_price = max(5, min(new_price, 20))  # Bound between $5 and $20
        prices.append(new_price)

    # Assign calculated prices back to main DataFrame
    model1_df.loc[lot_mask, 'Model1Price'] = prices

# Results of Model 1

model1_df[['SystemCodeNumber', 'Timestamp', 'OccupancyRate', 'Model1Price']].head(10)


Unnamed: 0,SystemCodeNumber,Timestamp,OccupancyRate,Model1Price
0,BHMBCCMKT01,2016-10-04 07:59:00,0.105719,10.0
1,BHMBCCMKT01,2016-10-04 08:25:00,0.110919,10.221837
2,BHMBCCMKT01,2016-10-04 08:59:00,0.138648,10.499133
3,BHMBCCMKT01,2016-10-04 09:32:00,0.185442,10.870017
4,BHMBCCMKT01,2016-10-04 09:59:00,0.259965,11.389948
5,BHMBCCMKT01,2016-10-04 10:26:00,0.306759,12.003466
6,BHMBCCMKT01,2016-10-04 10:59:00,0.379549,12.762565
7,BHMBCCMKT01,2016-10-04 11:25:00,0.428076,13.618718
8,BHMBCCMKT01,2016-10-04 11:59:00,0.448873,14.516464
9,BHMBCCMKT01,2016-10-04 12:29:00,0.461005,15.438475


# MODEL 2 **IMPLEMENTAITON** **Demand based pricing model**



In [166]:
# Model 2 - Demand-Based Pricing

model2_df = cleaned_df.copy()

# Define weights (these are taken as per their importance in the model)
alpha = 2.0     # OccupancyRate
beta = 1.5      # QueueLength
gamma = 1.0     # TrafficScore
delta = 1.0     # IsSpecialDay
epsilon = 1.0   # VehicleTypeWeight
lambda_ = 0.5   # Price scaling factor

# Demand Score
model2_df['DemandScore'] = (
    alpha * model2_df['OccupancyRate'] +
    beta * model2_df['QueueLengthNorm'] -
    gamma * model2_df['TrafficScore'] +
    delta * model2_df['IsSpecialDay'] +
    epsilon * model2_df['VehicleTypeWeight']
)

# Normalize demand score (min-max scaling with np and pd only)
min_demand = model2_df['DemandScore'].min()
max_demand = model2_df['DemandScore'].max()
model2_df['NormalizedDemand'] = (model2_df['DemandScore'] - min_demand) / (max_demand - min_demand)

# Computing final price
base_price = 10
model2_df['Model2Price'] = base_price * (1 + lambda_ * model2_df['NormalizedDemand'])

# Bound the price between $5 and $20
model2_df['Model2Price'] = model2_df['Model2Price'].clip(lower=5, upper=20)

# Result of Model 2
model2_df[['SystemCodeNumber', 'Timestamp', 'OccupancyRate', 'NormalizedDemand', 'Model2Price']].head(10)


Unnamed: 0,SystemCodeNumber,Timestamp,OccupancyRate,NormalizedDemand,Model2Price
0,BHMBCCMKT01,2016-10-04 07:59:00,0.105719,0.407247,12.036237
1,BHMBCCMKT01,2016-10-04 08:25:00,0.110919,0.409096,12.045479
2,BHMBCCMKT01,2016-10-04 08:59:00,0.138648,0.436728,12.183642
3,BHMBCCMKT01,2016-10-04 09:32:00,0.185442,0.453363,12.266817
4,BHMBCCMKT01,2016-10-04 09:59:00,0.259965,0.390982,11.954911
5,BHMBCCMKT01,2016-10-04 10:26:00,0.306759,0.514266,12.57133
6,BHMBCCMKT01,2016-10-04 10:59:00,0.379549,0.326845,11.634225
7,BHMBCCMKT01,2016-10-04 11:25:00,0.428076,0.415195,12.075976
8,BHMBCCMKT01,2016-10-04 11:59:00,0.448873,0.28039,11.40195
9,BHMBCCMKT01,2016-10-04 12:29:00,0.461005,0.213604,11.068018


# MODEL 3 **IMPLEMENTATION** **Competitive pricing model**

In [112]:
# pre-req. for model 3

from math import radians, cos, sin, asin, sqrt

# Haversine formula to calculate distance between two lat-long points
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth's radius in KM
    lat1,lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    return R * 2 * asin(sqrt(a))


In [167]:
# Model 3
# making copy of Model 2 data
model3_df = model2_df.copy()
model3_df.sort_values(by=['SystemCodeNumber', 'Timestamp'], inplace=True)

# Now implement Haversine formula to compute distance in KM
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth's radius in km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    return R * 2 * np.arcsin(np.sqrt(a))

# Parameters
radius_km = 0.7
time_window = timedelta(minutes=5)

# Pre-compute lot coordinates
lot_locations = model3_df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].first()

# Start with Model 2 prices
model3_df['Model3Price'] = model3_df['Model2Price']

# Apply competitive logic
for i, row in model3_df.iterrows():
    current_lot = row['SystemCodeNumber']
    curr_lat = row['Latitude']
    curr_lon = row['Longitude']
    curr_price = row['Model2Price']
    curr_time = row['Timestamp']
    curr_occupancy = row['Occupancy']
    curr_capacity = row['Capacity']

    nearby_prices = []

# Find nearby lots
    for other_lot, loc in lot_locations.iterrows():
        if other_lot == current_lot:
            continue
        distance = haversine(curr_lat, curr_lon, loc['Latitude'], loc['Longitude'])
        if distance <= radius_km:
# Time-window match
            mask = (
                (model3_df['SystemCodeNumber'] == other_lot) &
                (abs(model3_df['Timestamp'] - curr_time) <= time_window)
            )
            other_prices = model3_df[mask]['Model2Price']
            nearby_prices.extend(other_prices.tolist())

# Adjust price based on nearby competitors
    if nearby_prices:
        avg_nearby_price = np.mean(nearby_prices)
        occ_rate = curr_occupancy / curr_capacity if curr_capacity > 0 else 0

        if occ_rate >= 1.0 and avg_nearby_price < curr_price:
            # Lot overloaded and cheaper options nearby → reduce price
            model3_df.at[i, 'Model3Price'] = max(5, curr_price - 1.5)
        elif avg_nearby_price > curr_price + 2:
            # Competitors are more expensive → raise price
            model3_df.at[i, 'Model3Price'] = min(20, curr_price + 1.0)

    # if price changed
    if model3_df.at[i, 'Model3Price'] != curr_price:
        print(f"[{curr_time}] {current_lot}: {curr_price:.2f} → {model3_df.at[i, 'Model3Price']:.2f}")

# output of model 3
model3_df[['SystemCodeNumber', 'Timestamp', 'Model2Price', 'Model3Price']].head(15)


[2016-10-11 09:57:00] BHMBCCMKT01: 11.00 → 12.00
[2016-10-11 12:30:00] BHMBCCMKT01: 10.66 → 11.66
[2016-10-12 16:31:00] BHMBCCMKT01: 10.68 → 11.68
[2016-10-13 12:31:00] BHMBCCMKT01: 10.60 → 11.60
[2016-10-25 15:02:00] BHMBCCMKT01: 11.47 → 12.47
[2016-11-08 09:26:00] BHMBCCMKT01: 10.55 → 11.55
[2016-11-08 14:26:00] BHMBCCMKT01: 11.03 → 12.03
[2016-11-09 12:00:00] BHMBCCMKT01: 10.54 → 11.54
[2016-11-09 12:27:00] BHMBCCMKT01: 10.39 → 11.39
[2016-11-17 09:57:00] BHMBCCMKT01: 11.38 → 12.38
[2016-11-17 11:04:00] BHMBCCMKT01: 11.16 → 12.16
[2016-11-17 14:04:00] BHMBCCMKT01: 11.29 → 12.29
[2016-11-21 12:31:00] BHMBCCMKT01: 10.22 → 11.22
[2016-11-23 08:30:00] BHMBCCMKT01: 10.60 → 11.60
[2016-11-23 13:04:00] BHMBCCMKT01: 10.38 → 11.38
[2016-11-28 09:28:00] BHMBCCMKT01: 10.76 → 11.76
[2016-11-29 09:02:00] BHMBCCMKT01: 10.45 → 11.45
[2016-11-30 15:01:00] BHMBCCMKT01: 10.90 → 11.90
[2016-12-01 10:58:00] BHMBCCMKT01: 10.45 → 11.45
[2016-12-01 11:32:00] BHMBCCMKT01: 10.34 → 11.34
[2016-12-05 11:56:00

Unnamed: 0,SystemCodeNumber,Timestamp,Model2Price,Model3Price
0,BHMBCCMKT01,2016-10-04 07:59:00,12.036237,12.036237
1,BHMBCCMKT01,2016-10-04 08:25:00,12.045479,12.045479
2,BHMBCCMKT01,2016-10-04 08:59:00,12.183642,12.183642
3,BHMBCCMKT01,2016-10-04 09:32:00,12.266817,12.266817
4,BHMBCCMKT01,2016-10-04 09:59:00,11.954911,11.954911
5,BHMBCCMKT01,2016-10-04 10:26:00,12.57133,12.57133
6,BHMBCCMKT01,2016-10-04 10:59:00,11.634225,11.634225
7,BHMBCCMKT01,2016-10-04 11:25:00,12.075976,12.075976
8,BHMBCCMKT01,2016-10-04 11:59:00,11.40195,11.40195
9,BHMBCCMKT01,2016-10-04 12:29:00,11.068018,11.068018


# just to make sure that everything is fine with cleaned dataset before pathway integration: :D#

In [168]:
cleaned_df
print(cleaned_df.dtypes)
print(cleaned_df['SystemCodeNumber'].unique())
print(cleaned_df.isnull().sum())

ID                                 int64
SystemCodeNumber                  object
Capacity                           int64
Latitude                         float64
Longitude                        float64
Occupancy                          int64
VehicleType                       object
TrafficConditionNearby            object
QueueLength                        int64
IsSpecialDay                       int64
LastUpdatedDate                   object
LastUpdatedTime                   object
Timestamp                 datetime64[ns]
OccupancyRate                    float64
QueueLengthNorm                  float64
VehicleTypeWeight                float64
TrafficScore                       int64
dtype: object
['BHMBCCMKT01' 'BHMBCCTHL01' 'BHMEURBRD01' 'BHMMBMMBX01' 'BHMNCPHST01'
 'BHMNCPNST01' 'Broad Street' 'Others-CCCPS105a' 'Others-CCCPS119a'
 'Others-CCCPS135a' 'Others-CCCPS202' 'Others-CCCPS8' 'Others-CCCPS98'
 'Shopping']
ID                        0
SystemCodeNumber          0
Capacity  

PATHWAY INTEGRAION FOR REAL TIME STIMULATION OF MODEL **bold text**

In [169]:
!pip install pathway --quiet

import pathway as pw


# Demand weights
alpha = 2.0     # OccupancyRate
beta = 1.5      # QueueLength
gamma = 1.0     # TrafficScore
delta = 1.0     # IsSpecialDay
epsilon = 1.0   # VehicleTypeWeight
lambda_ = 0.5   # Price scaling factor

# Vehicle + traffic mappings
vehicle_weights = {'bike': 0.5, 'car': 1.0, 'truck': 1.5, 'cycle': 0.2}
traffic_map = {'low': 0, 'average': 1, 'high': 2}

# Schema for CSV columns
class InputSchema(pw.Schema):
    ID: int
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str

input_table = pw.io.csv.read(
    "cleaned_df.csv",
    schema=InputSchema,
    mode="static"
)

@pw.udf
def compute_price(occupancy, capacity, queue, traffic, special, vtype):
    if capacity == 0:
        return 10.0
# normalised based on assumed max
    occupancy_rate = occupancy / capacity
    queue_norm = queue / 10
    traffic_score = traffic_map.get(traffic, 1)
    v_weight = vehicle_weights.get(vtype, 1.0)

    demand = (
        alpha * occupancy_rate +
        beta * queue_norm -
        gamma * traffic_score +
        delta * special +
        epsilon * v_weight
    )
# roughly normalised
    norm_demand = (demand - 1) / 10
    price = 10 * (1 + lambda_ * norm_demand)
    return round(np.clip(price, 5, 20), 2)

# Apply the pricing logic
output_table = input_table.select(
    SystemCodeNumber=input_table.SystemCodeNumber,
    Timestamp=input_table.LastUpdatedDate + " " + input_table.LastUpdatedTime,
    Price=compute_price(
        input_table.Occupancy,
        input_table.Capacity,
        input_table.QueueLength,
        input_table.TrafficConditionNearby,
        input_table.IsSpecialDay,
        input_table.VehicleType,
    )
)

# Save output to file
pw.io.jsonlines.write(output_table, "output.jsonl")

# Run once and terminate
pw.run()


Output()

ERROR:pathway_engine.connectors:Parse error: failed to parse value "BHMBCCMKT01" at field "SystemCodeNumber" according to the type int in schema: invalid digit found in string
ERROR:pathway_engine.connectors:Parse error: failed to parse value "BHMBCCMKT01" at field "SystemCodeNumber" according to the type int in schema: invalid digit found in string
ERROR:pathway_engine.connectors:Parse error: failed to parse value "BHMBCCMKT01" at field "SystemCodeNumber" according to the type int in schema: invalid digit found in string
ERROR:pathway_engine.connectors:Parse error: failed to parse value "BHMBCCMKT01" at field "SystemCodeNumber" according to the type int in schema: invalid digit found in string
ERROR:pathway_engine.connectors:Parse error: failed to parse value "BHMBCCMKT01" at field "SystemCodeNumber" according to the type int in schema: invalid digit found in string
ERROR:pathway_engine.connectors:Parse error: failed to parse value "BHMBCCMKT01" at field "SystemCodeNumber" according t

KeyboardInterrupt: 

In [170]:
!head output.jsonl


{"SystemCodeNumber":"Others-CCCPS119a","Timestamp":"06-10-2016 10:03:00","Price":10.21,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"BHMBCCMKT01","Timestamp":"29-10-2016 12:06:00","Price":10.17,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"Shopping","Timestamp":"05-11-2016 10:59:00","Price":9.69,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"BHMBCCTHL01","Timestamp":"14-12-2016 13:03:00","Price":10.55,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"BHMNCPHST01","Timestamp":"10-10-2016 11:57:00","Price":9.96,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"BHMEURBRD01","Timestamp":"31-10-2016 09:00:00","Price":10.73,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"BHMBCCTHL01","Timestamp":"18-10-2016 15:27:00","Price":10.95,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"BHMNCPHST01","Timestamp":"22-10-2016 09:26:00","Price":10.53,"diff":1,"time":1752085236342}
{"SystemCodeNumber":"Others-CCCPS105a","Timestamp":"31-10-2016 15:00:00","Price":10.4,"diff":1,"

In [171]:
df_output = pd.read_json("output.jsonl", lines=True)
df_output.head()


Unnamed: 0,SystemCodeNumber,Timestamp,Price,diff,time
0,Others-CCCPS119a,2016-06-10 10:03:00,10.21,1,1752085236342
1,BHMBCCMKT01,2016-10-29 12:06:00,10.17,1,1752085236342
2,Shopping,2016-05-11 10:59:00,9.69,1,1752085236342
3,BHMBCCTHL01,2016-12-14 13:03:00,10.55,1,1752085236342
4,BHMNCPHST01,2016-10-10 11:57:00,9.96,1,1752085236342


# **visualisation**

In [172]:
!pip install bokeh --quiet


# bokeh plot for individual plotting and unified plotting

In [173]:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
from bokeh.models import HoverTool
from bokeh.palettes import Category10

# Load and prepare data
df = pd.read_json("output.jsonl", lines=True)
df = df[['SystemCodeNumber', 'Timestamp', 'Price']]
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

output_notebook()

# Generate one plot per SystemCodeNumber
plots = []
colors = Category10[10]

for i, lot in enumerate(df['SystemCodeNumber'].unique()):
    lot_df = df[df['SystemCodeNumber'] == lot]

    p = figure(
        x_axis_type='datetime',
        width=800,
        height=300,
        title=f"Dynamic Pricing for Lot: {lot}"
    )

    color = colors[i % len(colors)]
    p.line(lot_df['Timestamp'], lot_df['Price'], line_width=2, color=color, legend_label=lot)
    p.scatter(lot_df['Timestamp'], lot_df['Price'], fill_color="white", size=6)

    hover = HoverTool(tooltips=[
        ("Time", "@x{%F %T}"),
        ("Price", "@y{$0.00}")
    ], formatters={'@x': 'datetime'}, mode='vline')
    p.add_tools(hover)

    p.xaxis.axis_label = "Timestamp"
    p.yaxis.axis_label = "Price ($)"
    p.legend.location = "top_left"
    p.grid.grid_line_alpha = 0.3

    plots.append(p)

# Show all plots stacked vertically
show(column(*plots))


In [174]:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Category10

# Load data
dfo = pd.read_json("output.jsonl", lines=True)

# Prepare
dfo = dfo[['SystemCodeNumber', 'Timestamp', 'Price']]
dfo['Timestamp'] = pd.to_datetime(dfo['Timestamp'])
dfo.sort_values(by='Timestamp', inplace=True)

# Setup output
output_notebook()

# Create figure
p = figure(x_axis_type='datetime', width=950, height=500,
           title="Dynamic Pricing for All Parking Lots")

# Assign colors (repeat palette if needed)
unique_lots = dfo['SystemCodeNumber'].unique()
palette = Category10[10] * ((len(unique_lots) // 10) + 1)

# Plot each lot as a separate line
for i, lot in enumerate(unique_lots):
    lot_df = dfo[dfo['SystemCodeNumber'] == lot]
    source = ColumnDataSource(lot_df)

    p.line('Timestamp', 'Price', source=source, line_width=2, color=palette[i], legend_label=lot)
    p.scatter('Timestamp', 'Price', source=source, size=4, marker='circle', fill_color="white", color=palette[i])

# Hover tool
hover = HoverTool(
    tooltips=[
        ("Lot", "@SystemCodeNumber"),
        ("Time", "@Timestamp{%F %T}"),
        ("Price", "@Price{$0.00}")
    ],
    formatters={'@Timestamp': 'datetime'},
    mode='vline'
)
p.add_tools(hover)

# Final touches
p.xaxis.axis_label = "Timestamp"
p.yaxis.axis_label = "Price ($)"
p.legend.title = "SystemCodeNumber"
p.legend.location = "top_left"
p.legend.click_policy = "hide"  # Optional: toggle visibility
p.grid.grid_line_alpha = 0.3

show(p)
