In [1]:
!pip install pandas numpy bokeh pathway



In [2]:
# --- Cell 2: Imports and Notebook Setup ---
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
import pathway as pw  # Needed for full Pathway, simulated here
output_notebook()


In [4]:
# --- Cell 3: Load the real dataset ---
df = pd.read_csv('/content/dataset.csv')

# Show first few rows to verify
display(df.head())
print("Columns in dataset:", df.columns.tolist())


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


Columns in dataset: ['ID', 'SystemCodeNumber', 'Capacity', 'Latitude', 'Longitude', 'Occupancy', 'VehicleType', 'TrafficConditionNearby', 'QueueLength', 'IsSpecialDay', 'LastUpdatedDate', 'LastUpdatedTime']


In [6]:
# --- Cell 4: Preprocessing and Renaming ---

# Rename columns for consistency
df.rename(columns={
    'ID': 'LotID',  # Or 'SystemCodeNumber': 'LotID' if that is the unique lot code!
    'Latitude': 'Latitude',
    'Longitude': 'Longitude',
    'Capacity': 'Capacity',
    'Occupancy': 'Occupancy',
    'QueueLength': 'QueueLength',
    'VehicleType': 'VehicleType',
    'TrafficConditionNearby': 'Traffic',
    'IsSpecialDay': 'IsSpecialDay'
}, inplace=True)

# Create a Timestamp column from LastUpdatedDate and LastUpdatedTime
df['Timestamp'] = df['LastUpdatedDate'].astype(str) + ' ' + df['LastUpdatedTime'].astype(str)

# Ensure numerical columns are float or int, not strings
for col in ['Capacity', 'Occupancy', 'QueueLength', 'Traffic', 'IsSpecialDay']:
    df[col] = pd.to_numeric(df[col], errors='coerce')


# Drop unused columns if you want (optional)
# df = df[['LotID', 'Latitude', 'Longitude', 'Capacity', 'Occupancy', 'QueueLength', 'VehicleType', 'Traffic', 'IsSpecialDay', 'Timestamp']]

# Now check columns
required_cols = ['LotID', 'Latitude', 'Longitude', 'Capacity', 'Occupancy', 'QueueLength',
                 'VehicleType', 'Traffic', 'IsSpecialDay', 'Timestamp']
assert all(col in df.columns for col in required_cols), "Some required columns are missing!"


In [7]:
# --- Cell 5: Helper Functions ---
def vehicle_type_weight(vtype):
    # You can adjust weights as per real data analysis
    return {'car': 1.0, 'bike': 0.5, 'truck': 1.5}.get(vtype, 1.0)

def smooth_price(last_price, target_price, max_delta=2.0):
    # Limits price jump for stability
    delta = target_price - last_price
    if abs(delta) > max_delta:
        return last_price + np.sign(delta) * max_delta
    else:
        return target_price


In [8]:
# --- Cell 6: Pricing Models ---

# Baseline Linear Model
def baseline_linear_price(last_price, occupancy, capacity, alpha=0.5):
    return last_price + alpha * (occupancy / capacity)

# Demand-Based Price Function
def demand_function(occupancy, capacity, queue, traffic, is_special, vtype,
                    alpha=1.0, beta=0.2, gamma=0.3, delta=0.5, epsilon=0.2):
    demand = (alpha * occupancy / capacity +
              beta * queue -
              gamma * traffic +
              delta * is_special +
              epsilon * vehicle_type_weight(vtype))
    return demand

def demand_based_price(base_price, demand, lmbda=0.8):
    norm_demand = (demand - 0) / 5.0  # Adjust denominator for your data
    price = base_price * (1 + lmbda * norm_demand)
    return np.clip(price, 0.5 * base_price, 2 * base_price)


In [9]:
# --- Cell 7: Competitive Pricing Model ---
from scipy.spatial.distance import cdist

def get_nearby_competitor_prices(lot_id, all_lots_df, current_prices, radius=0.01):
    # Use only this timestamp's data for proximity calculation
    lot_info = all_lots_df[all_lots_df['LotID'] == lot_id][['Latitude', 'Longitude']].iloc[0]
    others = all_lots_df[all_lots_df['LotID'] != lot_id]
    if others.empty:
        return []
    dists = cdist([lot_info], others[['Latitude', 'Longitude']])
    mask = dists[0] < radius
    competitor_prices = []
    for i, m in zip(others['LotID'], mask):
        if m and i in current_prices:
            competitor_prices.append(current_prices[i])
    return competitor_prices

def competitive_price(my_price, occupancy, capacity, competitor_prices):
    if not competitor_prices:
        return my_price
    avg_comp = np.mean(competitor_prices)
    if occupancy >= 0.95 * capacity and my_price > avg_comp:
        return my_price - 1.0
    elif my_price < avg_comp:
        return my_price + 0.5
    else:
        return my_price


In [None]:
# --- Cell 8: Real-Time Simulation Loop ---

base_price = 10.0
lot_ids = sorted(df['LotID'].unique())
prices_history = {lot: [base_price] for lot in lot_ids}
timestamps = sorted(df['Timestamp'].unique())

for ts in timestamps:
    ts_df = df[df['Timestamp'] == ts]
    current_prices = {lot: prices_history[lot][-1] for lot in lot_ids}
    for lot in lot_ids:
        lot_rows = ts_df[ts_df['LotID'] == lot]
        if lot_rows.empty:
            # If no data for this lot at this timestamp, keep last price
            prices_history[lot].append(prices_history[lot][-1])
            continue
        lot_row = lot_rows.iloc[0]
        # Baseline Linear Model
        price1 = baseline_linear_price(current_prices[lot], lot_row['Occupancy'], lot_row['Capacity'], alpha=0.5)
        # Demand-Based Model
        demand = demand_function(
            lot_row['Occupancy'], lot_row['Capacity'], lot_row['QueueLength'],
            lot_row['Traffic'], lot_row['IsSpecialDay'], lot_row['VehicleType'])
        price2 = demand_based_price(base_price, demand, lmbda=0.8)
        price2 = smooth_price(current_prices[lot], price2, max_delta=2.0)
        # Competitive Model
        competitors = get_nearby_competitor_prices(lot, ts_df, current_prices)
        price3 = competitive_price(price2, lot_row['Occupancy'], lot_row['Capacity'], competitors)
        prices_history[lot].append(price3)


In [None]:
# --- Cell 9: Visualization using Bokeh ---
p_list = []
for lot in lot_ids:
    p = figure(title=f"Parking Lot {lot} Price Over Time", width=350, height=250)
    time_range = list(range(len(prices_history[lot])))
    p.line(time_range, prices_history[lot], legend_label="Price", line_width=2)
    p.xaxis.axis_label = "Time Step"
    p.yaxis.axis_label = "Price ($)"
    p_list.append(p)

# Show all in a grid
show(gridplot([p_list[i:i+4] for i in range(0, len(p_list), 4)]))


In [None]:
# --- Cell 10: Explanations and Markdown (Colab supports Markdown in text cells) ---

"""
## Demand Function
- Combines occupancy rate, queue, traffic, special day, and vehicle type.

## Assumptions
- Demand function weights are chosen for illustration; adjust after EDA.
- Competitor pricing uses lots within 0.01 degrees (~1km).
- Price capped between 0.5x and 2x base price; smoothed for realism.

## Visualization
- Interactive Bokeh plots for price evolution, one per lot.

## Further Extension
- Integrate with Pathway streaming for real-time deployment.
"""