In [None]:
import pandas as pd

df = pd.read_csv('/content/dataset.csv')

df.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,P-0,100,34.0522,-118.2437,50,Car,low,2,0,01-01-2023,12:00:00
1,1,P-1,150,34.0522,-118.2437,120,Truck,high,10,1,01-01-2023,12:01:00
2,2,P-2,80,34.0522,-118.2437,75,Bike,average,5,0,01-01-2023,12:02:00
3,3,P-3,200,34.0522,-118.2437,180,Car,high,8,1,01-01-2023,12:03:00
4,4,P-4,100,34.0522,-118.2437,0,Car,low,2,0,01-01-2023,12:04:00


In [None]:
df.info()
display(df.describe())
display(df['SystemCodeNumber'].nunique())
df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format='%d-%m-%Y %H:%M:%S')
display(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      100 non-null    int64  
 1   SystemCodeNumber        100 non-null    object 
 2   Capacity                100 non-null    int64  
 3   Latitude                100 non-null    float64
 4   Longitude               100 non-null    float64
 5   Occupancy               100 non-null    int64  
 6   VehicleType             100 non-null    object 
 7   TrafficConditionNearby  100 non-null    object 
 8   QueueLength             100 non-null    int64  
 9   IsSpecialDay            100 non-null    int64  
 10  LastUpdatedDate         100 non-null    object 
 11  LastUpdatedTime         100 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 9.5+ KB


Unnamed: 0,ID,Capacity,Latitude,Longitude,Occupancy,QueueLength,IsSpecialDay
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,49.5,132.5,34.0522,-118.2437,87.65,6.25,0.5
std,29.011492,46.80445,5.712979e-14,2.713665e-13,49.21328,3.046359,0.502519
min,0.0,80.0,34.0522,-118.2437,0.0,2.0,0.0
25%,24.75,95.0,34.0522,-118.2437,68.75,4.25,0.0
50%,49.5,125.0,34.0522,-118.2437,85.0,6.5,0.5
75%,74.25,162.5,34.0522,-118.2437,120.0,8.5,1.0
max,99.0,200.0,34.0522,-118.2437,180.0,10.0,1.0


100

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,timestamp
0,0,P-0,100,34.0522,-118.2437,50,Car,low,2,0,01-01-2023,12:00:00,2023-01-01 12:00:00
1,1,P-1,150,34.0522,-118.2437,120,Truck,high,10,1,01-01-2023,12:01:00,2023-01-01 12:01:00
2,2,P-2,80,34.0522,-118.2437,75,Bike,average,5,0,01-01-2023,12:02:00,2023-01-01 12:02:00
3,3,P-3,200,34.0522,-118.2437,180,Car,high,8,1,01-01-2023,12:03:00,2023-01-01 12:03:00
4,4,P-4,100,34.0522,-118.2437,0,Car,low,2,0,01-01-2023,12:04:00,2023-01-01 12:04:00


In [None]:
df['occupancy ratio'] = df['Occupancy']/df['Capacity']

vechile_wt = {'car':1.0,'bike':0.5,'truck':1.5}
df['VehicleWeight'] = df['VehicleType'].map(vechile_wt)

df['IsSpecialDay'] = df['IsSpecialDay'].astype(int)

In [None]:
base_price = 10
alpha = 5

In [None]:
df = df.sort_values(by=['SystemCodeNumber','timestamp'])
df['price_linear'] = base_price

for lot in df['SystemCodeNumber'].unique():
  temp = df[df['SystemCodeNumber'] == lot].copy()
  prices = [base_price]

  for i in range(1, len(temp)):
    prev_prices = prices[-1]
    occ_ratio = temp.iloc[i]['occupancy ratio']
    next_price = prev_prices + alpha * (occ_ratio)
    prices.append(next_price)

  df.loc[df['SystemCodeNumber'] == lot, 'price_linear'] = prices

In [None]:
# One-hot encode 'TrafficConditionNearby'
traffic_encoded = pd.get_dummies(df['TrafficConditionNearby'], prefix='Traffic')

# Combine the encoded columns into a single 'normalized traffic' score
# Assuming 'heavy' traffic has a higher impact than 'average' or 'low'
df['normalized traffic'] = (traffic_encoded['Traffic_high'] * 2 +
                              traffic_encoded['Traffic_average'] * 1 +
                              traffic_encoded['Traffic_low'] * 0)

a,b,c,d,e = 1,0.8,0.5,1.2,1.0

df['demand_raw'] = (a*df['occupancy ratio'] + b*df['QueueLength'] - c*df['normalized traffic'] + d*df['IsSpecialDay'] + e*df['VehicleWeight'])

In [None]:
d_min, d_max = df['demand_raw'].min(), df['demand_raw'].max()
df['normalized_demand'] = (df['demand_raw'] - d_min) / (d_max - d_min)

In [None]:
lambda_ = 1.5
df['price demand'] = base_price * (1 + lambda_*df['normalized_demand'])
df['price demand'] = df['price demand'].clip(lower = 0.5 * base_price, upper = 2 * base_price)

In [None]:
from math import radians,sin,cos,sqrt,atan2

def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c



In [None]:
import pathway as pw

print("Attributes available in the 'pathway' module:")
print(dir(pw))

print("\nAttributes available in the 'pathway.io' submodule:")
print(dir(pw.io))

# Try to access and print attributes of pw.io.csv if it exists
try:
    print("\nAttributes available in the 'pathway.io.csv' submodule:")
    print(dir(pw.io.csv))
except AttributeError:
    print("\n'pathway.io.csv' submodule not found.")

# Try to access and print attributes of pw.debug if it exists
try:
    print("\nAttributes available in the 'pathway.debug' submodule:")
    print(dir(pw.debug))
except AttributeError:
    print("\n'pathway.debug' submodule not found.")

# Try to access and print attributes of pw.types if it exists (for schema types)
try:
    print("\nAttributes available in the 'pathway.types' submodule:")
    print(dir(pw.types))
except AttributeError:
    print("\n'pathway.types' submodule not found.")

Attributes available in the 'pathway' module:

Attributes available in the 'pathway.io' submodule:
['CsvParserSettings', 'OnChangeCallback', 'OnFinishCallback', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_subscribe', '_synchronization', '_utils', 'airbyte', 'bigquery', 'csv', 'debezium', 'deltalake', 'elasticsearch', 'fs', 'gdrive', 'http', 'iceberg', 'jsonlines', 'kafka', 'logstash', 'minio', 'mongodb', 'mqtt', 'nats', 'null', 'plaintext', 'postgres', 'pubsub', 'pyfilesystem', 'python', 'redpanda', 'register_input_synchronization_group', 's3', 's3_csv', 'slack', 'sqlite', 'subscribe']

Attributes available in the 'pathway.io.csv' submodule:
['ColumnReference', 'CsvParserSettings', 'Iterable', 'Literal', 'PathLike', 'Table', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'annotations', 'check_arg_types', 'check_deprecated_kwargs',

In [None]:
!pip install pathway pandas --quiet

import pathway as pw
import pandas as pd
import time
import os

# Remove the dummy dataset creation and file writing
# data = {
#     'ID': range(100),
#     'SystemCodeNumber': [f'P-{i}' for i in range(100)],
#     'Capacity': [100, 150, 80, 200] * 25,
#     'Latitude': [34.0522] * 100,
#     'Longitude': [-118.2437] * 100,
#     'Occupancy': [50, 120, 75, 180, 0, 90, 80, 100] * 12 + [50, 120, 75, 180],
#     'VehicleType': ['Car', 'Truck', 'Bike', 'Car'] * 25,
#     'TrafficConditionNearby': ['low', 'high', 'average', 'high'] * 25,
#     'QueueLength': [2, 10, 5, 8] * 25,
#     'IsSpecialDay': [0, 1, 0, 1] * 25,
#     'LastUpdatedDate': ['01-01-2023'] * 100,
#     'LastUpdatedTime': [f'12:{i:02d}:00' for i in range(60)] + [f'13:{i:02d}:00' for i in range(40)]
# }
# df = pd.DataFrame(data)
# df.to_csv("dataset.csv", index=False)


class ParkingSchema(pw.Schema):
    ID: int
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str


input_stream = pw.io.csv.read(
    "/content/dataset.csv", # Use the existing dataset
    schema=ParkingSchema,
    mode="streaming",
    autocommit_duration_ms=500
)

# Use a UDF to convert date and time strings to timestamps
@pw.udf
def to_timestamp_udf(date_str: str, time_str: str) -> pw.DateTimeUtc:
    from datetime import datetime
    # Ensure consistent format string
    return datetime.strptime(date_str + " " + time_str, "%d-%m-%Y %H:%M:%S")

input_stream = input_stream.with_columns(
    timestamp=to_timestamp_udf(
        input_stream.LastUpdatedDate, input_stream.LastUpdatedTime
    )
)


@pw.udf
def traffic_map(val: str) -> int:
    mapping = {'low': 0, 'average': 1, 'high': 2}
    return mapping.get(val.lower(), 1) if isinstance(val, str) else 1

input_stream = input_stream.with_columns(
    normalized_traffic=traffic_map(input_stream.TrafficConditionNearby)
)

# --- START OF CORRECTIONS ---
# Replace the Python UDF with native Pathway expressions for better performance and correctness.

# 1. Calculate occupancy ratio safely using pw.if_else
# This avoids division by zero and works correctly on Pathway expressions.
# `pw.if_else(condition, value_if_true, value_if_false)`
intermediate_stream = input_stream.with_columns(
    occ_ratio=pw.if_else(
        input_stream.Capacity > 0,
        input_stream.Occupancy / input_stream.Capacity,
        0.0  # Use 0.0 to ensure the column is float
    )
)

# 2. Calculate vehicle weight using chained pw.if_else
intermediate_stream = intermediate_stream.with_columns(
    vehicle_weight=pw.if_else(intermediate_stream.VehicleType == "Car", 1.0,
                     pw.if_else(intermediate_stream.VehicleType == "Truck", 1.5,
                     pw.if_else(intermediate_stream.VehicleType == "Bike", 0.5,
                                1.0))) # Default weight
)

# 3. Calculate price logic directly on columns
base_price = 10
a, b, c, d, e = 1.0, 0.6, 0.3, 0.8, 0.5

# Calculate raw demand using the new columns
raw_demand = (
    a * intermediate_stream.occ_ratio +
    b * intermediate_stream.QueueLength -
    c * intermediate_stream.normalized_traffic +
    d * intermediate_stream.IsSpecialDay +
    e * intermediate_stream.vehicle_weight
)

# Normalize demand, using pw.min and pw.max
# Note: pw.min and pw.max are for column-wise operations. For scalar comparison, use Python's built_in min/max or pw.if_else.
# Let's use pw.if_else for correctness with Pathway expressions.
normalized_demand = pw.if_else(
    raw_demand < 0, 0.0,  # Cap at minimum 0
    pw.if_else(raw_demand / 5 > 1, 1.0,  # Cap at maximum 1
               raw_demand / 5)  # Otherwise, use the calculated value
)


# Calculate the price
price = base_price * (1 + 1.5 * normalized_demand)

# Apply caps and floors to the price using pw.if_else
capped_price = pw.if_else(
    price < 0.5 * base_price, 0.5 * base_price,  # Floor at 0.5 * base_price
    pw.if_else(price > 2 * base_price, 2 * base_price,  # Cap at 2 * base_price
               price)  # Otherwise, use the calculated value
)


# Add the final price column
output = intermediate_stream.with_columns(
    price=capped_price
).select(
    timestamp=intermediate_stream.timestamp,
    parking_lot_id=intermediate_stream.SystemCodeNumber,
    price=pw.this.price
)
# --- END OF CORRECTIONS ---


def print_output():
    print("Starting to process stream. Output will appear below:")
    try:
        # Using debug.compute_and_print_update_stream for Pathway
        pw.debug.compute_and_print_update_stream(output)

    except KeyboardInterrupt:
        print("\nStream processing stopped.")


pw.run()

# Clean up the dummy file - this is no longer needed as we are using the original file
# os.remove("dataset.csv")

Output()



In [None]:
df['occupancy ratio'] = df['Occupancy']/df['Capacity']

vechile_wt = {'car':1.0,'bike':0.5,'truck':1.5}
df['VehicleWeight'] = df['VehicleType'].map(vechile_wt)

df['IsSpecialDay'] = df['IsSpecialDay'].astype(int)

In [None]:
base_price = 10
alpha = 5

In [None]:
df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format='%d-%m-%Y %H:%M:%S')
df = df.sort_values(by=['SystemCodeNumber','timestamp'])
df['price_linear'] = base_price

for lot in df['SystemCodeNumber'].unique():
  temp = df[df['SystemCodeNumber'] == lot].copy()
  prices = [base_price]

  for i in range(1, len(temp)):
    prev_prices = prices[-1]
    occ_ratio = temp.iloc[i]['occupancy ratio']
    next_price = prev_prices + alpha * (occ_ratio)
    prices.append(next_price)

  df.loc[df['SystemCodeNumber'] == lot, 'price_linear'] = prices

In [None]:
# One-hot encode 'TrafficConditionNearby'
traffic_encoded = pd.get_dummies(df['TrafficConditionNearby'], prefix='Traffic')

# Combine the encoded columns into a single 'normalized traffic' score
# Assuming 'heavy' traffic has a higher impact than 'average' or 'low'
df['normalized traffic'] = (traffic_encoded['Traffic_high'] * 2 +
                              traffic_encoded['Traffic_average'] * 1 +
                              traffic_encoded['Traffic_low'] * 0)

a,b,c,d,e = 1,0.8,0.5,1.2,1.0

df['demand_raw'] = (a*df['occupancy ratio'] + b*df['QueueLength'] - c*df['normalized traffic'] + d*df['IsSpecialDay'] + e*df['VehicleWeight'])

In [None]:
d_min, d_max = df['demand_raw'].min(), df['demand_raw'].max()
df['normalized_demand'] = (df['demand_raw'] - d_min) / (d_max - d_min)

In [None]:
lambda_ = 1.5
df['price demand'] = base_price * (1 + lambda_*df['normalized_demand'])
df['price demand'] = df['price demand'].clip(lower = 0.5 * base_price, upper = 2 * base_price)

In [None]:
from bokeh.plotting import figure, show, output_notebook
output_notebook()

from bokeh.models import ColumnDataSource
import numpy as np

source = ColumnDataSource(data=dict(x=df['LastUpdatedDate'], y=df['price demand']))

p = figure(x_axis_type="datetime", title="Dynamic Price")
p.line(x='x', y='y', source=source, line_width=2)
show(p)

In [None]:
!pip install bokeh

