<a href="https://colab.research.google.com/github/arpit-devop/summeranalyticsIITG/blob/main/analyticsIITG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import pandas as pd
import numpy as np

# --- Configuration for Model 1 ---
BASE_PRICE = 10  # Base price for parking
ALPHA = 5


parking_lot_data_t = {
    'ParkingLot_ID': 'ParkingLot_A',
    'Previous_Price': 10.0,
    'Occupancy': 75,
    'Capacity': 100
}


current_lot_state = pd.Series(parking_lot_data_t)

# --- Model 1: Baseline Linear Model Logic ---
def calculate_price_model1(previous_price, occupancy, capacity, alpha=ALPHA):
    """
    Calculates the next price using the Baseline Linear Model.
    Price(t+1) = Price(t) + alpha * (Occupancy / Capacity)

    Args:
        previous_price (float): The price of the parking lot at the previous time step (Price_t).
        occupancy (int): The current number of parked vehicles.
        capacity (int): The maximum capacity of the parking lot.
        alpha (float): The coefficient to adjust price based on occupancy.

    Returns:
        float: The calculated price for the next time step (Price_t+1).
    """
    if capacity == 0:

        occupancy_rate = 0
    else:
        occupancy_rate = occupancy / capacity


    next_price = previous_price + alpha * occupancy_rate


    return next_price


if __name__ == "__main__":
    print(f"--- Model 1: Baseline Linear Model Calculation ---")
    print(f"Parking Lot ID: {current_lot_state['ParkingLot_ID']}")
    print(f"Previous Price (Price_t): ${current_lot_state['Previous_Price']:.2f}")
    print(f"Current Occupancy: {current_lot_state['Occupancy']}")
    print(f"Capacity: {current_lot_state['Capacity']}")
    print(f"Alpha (α): {ALPHA}")

    next_price = calculate_price_model1(
        previous_price=current_lot_state['Previous_Price'],
        occupancy=current_lot_state['Occupancy'],
        capacity=current_lot_state['Capacity'],
        alpha=ALPHA
    )

    print(f"Calculated Next Price (Price_t+1): ${next_price:.2f}")



--- Model 1: Baseline Linear Model Calculation ---
Parking Lot ID: ParkingLot_A
Previous Price (Price_t): $10.00
Current Occupancy: 75
Capacity: 100
Alpha (α): 5
Calculated Next Price (Price_t+1): $13.75


In [24]:
!pip install pathway bokeh geopy --quiet

import pathway as pw
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from geopy.distance import geodesic
import asyncio
output_notebook()


In [25]:
# --- 2. Load Data ---

df = pd.read_csv('/content/dataset.csv')


df['timestamp'] = df['LastUpdatedDate'].astype(str) + ' ' + df['LastUpdatedTime'].astype(str)


df = df.sort_values(['SystemCodeNumber', 'timestamp']).reset_index(drop=True)
df.head()


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,timestamp
0,468,BHMBCCMKT01,577,26.144536,91.736172,50,car,low,1,0,01-11-2016,08:06:00,01-11-2016 08:06:00
1,469,BHMBCCMKT01,577,26.144536,91.736172,54,car,low,1,0,01-11-2016,08:26:00,01-11-2016 08:26:00
2,470,BHMBCCMKT01,577,26.144536,91.736172,69,car,average,2,0,01-11-2016,09:00:00,01-11-2016 09:00:00
3,471,BHMBCCMKT01,577,26.144536,91.736172,91,car,low,1,0,01-11-2016,09:26:00,01-11-2016 09:26:00
4,472,BHMBCCMKT01,577,26.144536,91.736172,135,truck,average,3,0,01-11-2016,10:00:00,01-11-2016 10:00:00


In [26]:
# --- 3. Helper Functions ---

def vehicle_type_weight(vtype):
    return {'car': 1.0, 'bike': 0.7, 'truck': 1.5, 'cycle': 0.5}.get(str(vtype).lower(), 1.0)

def traffic_level_num(traffic):

    return {'low': 0, 'average': 0.5, 'high': 1}.get(str(traffic).lower(), 0.5)

def normalize(series):
    return (series - series.min()) / (series.max() - series.min() + 1e-6)


In [27]:
# --- 4. Model 1: Baseline Linear Model ---

BASE_PRICE = 10.0
ALPHA = 5.0

def baseline_linear_model(df):
    df = df.copy()
    df['price'] = np.nan
    for lot_id in df['SystemCodeNumber'].unique():
        lot_mask = df['SystemCodeNumber'] == lot_id
        lot_df = df[lot_mask]
        prices = [BASE_PRICE]
        for i in range(1, len(lot_df)):
            prev_price = prices[-1]
            occupancy = lot_df.iloc[i]['Occupancy']
            capacity = lot_df.iloc[i]['Capacity']
            occupancy_rate = occupancy / capacity if capacity > 0 else 0
            next_price = prev_price + ALPHA * occupancy_rate
            prices.append(next_price)
        df.loc[lot_mask, 'price'] = prices
    return df


In [28]:
# --- 5. Model 2: Demand-Based Price Function ---


alpha, beta, gamma, delta, epsilon = 1, 0.5, 0.3, 2, 1
lambda_ = 0.8

def demand_function(row):
    return (
        alpha * (row['Occupancy'] / row['Capacity'] if row['Capacity'] > 0 else 0) +
        beta * row['QueueLength'] -
        gamma * traffic_level_num(row['TrafficConditionNearby']) +
        delta * row['IsSpecialDay'] +
        epsilon * vehicle_type_weight(row['VehicleType'])
    )

def demand_based_model(df):
    df = df.copy()
    df['demand'] = df.apply(demand_function, axis=1)
    df['norm_demand'] = df.groupby('timestamp')['demand'].transform(normalize)
    df['price'] = BASE_PRICE * (1 + lambda_ * df['norm_demand'])
    df['price'] = df['price'].clip(lower=0.5*BASE_PRICE, upper=2*BASE_PRICE)
    return df

In [29]:
# --- 6. Model 3: Competitive Pricing Model ---


def get_nearby_lots(df, lot_row, radius_km=0.5):
    current_coords = (lot_row['Latitude'], lot_row['Longitude'])
    def is_nearby(row):
        return geodesic(current_coords, (row['Latitude'], row['Longitude'])).km < radius_km and row['SystemCodeNumber'] != lot_row['SystemCodeNumber']
    return df[df.apply(is_nearby, axis=1)]


def simulate_competitor_price(df):
    df = df.copy()
    df['CompetitorPrice'] = df.groupby('timestamp')['price'].transform(lambda x: x.mean())
    return df

def competitive_pricing(df):
    df = df.copy()
    df['final_price'] = df['price']
    for idx, row in df.iterrows():
        nearby = get_nearby_lots(df[df['timestamp'] == row['timestamp']], row)
        if not nearby.empty:
            competitor_price = nearby['price'].mean()

            if row['Occupancy'] >= row['Capacity'] and competitor_price < row['price']:
                df.at[idx, 'final_price'] = max(competitor_price - 1, 0.5*BASE_PRICE)

            elif competitor_price > row['price']:
                df.at[idx, 'final_price'] = min(row['price'] + 1, 2*BASE_PRICE)
    return df


In [30]:

MODEL = 2

if MODEL == 1:
    model_df = baseline_linear_model(df)
elif MODEL == 2:
    model_df = demand_based_model(df)
elif MODEL == 3:
    model_df = demand_based_model(df)
    model_df = simulate_competitor_price(model_df)
    model_df = competitive_pricing(model_df)


from bokeh.palettes import Category10
p = figure(title="Price Evolution for All Parking Lots", x_axis_label='Time Step', y_axis_label='Price', width=900, height=400)
colors = Category10[10] + Category10[4]  # For up to 14 lots

for idx, lot_id in enumerate(model_df['SystemCodeNumber'].unique()):
    lot_df = model_df[model_df['SystemCodeNumber'] == lot_id]
    source = ColumnDataSource(data=dict(
        time=lot_df['timestamp'].astype(str),
        price=lot_df['final_price'] if MODEL == 3 else lot_df['price']
    ))
    p.line('time', 'price', source=source, legend_label=str(lot_id), line_width=2, color=colors[idx % len(colors)])

p.legend.location = "top_left"
show(p)


In [38]:
class ParkingSchema(pw.Schema):
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str
    # Add other columns if present


In [32]:


csv_path = '/content/dataset.csv'
stream = pw.demo.replay_csv(
    path=csv_path,
    schema=ParkingSchema,
    input_rate=2
)


In [33]:
def vehicle_type_weight(vtype):
    return {'car': 1.0, 'bike': 0.7, 'truck': 1.5, 'cycle': 0.5}.get(str(vtype).lower(), 1.0)

def traffic_level_num(traffic):
    return {'low': 0, 'average': 0.5, 'high': 1}.get(str(traffic).lower(), 0.5)

def demand_function(row):
    alpha, beta, gamma, delta, epsilon = 1, 0.5, 0.3, 2, 1
    return (
        alpha * (row['Occupancy'] / row['Capacity'] if row['Capacity'] > 0 else 0) +
        beta * row['QueueLength'] -
        gamma * traffic_level_num(row['TrafficConditionNearby']) +
        delta * row['IsSpecialDay'] +
        epsilon * vehicle_type_weight(row['VehicleType'])
    )


In [37]:
@pw.udf
def compute_price(
    Occupancy, Capacity, QueueLength, TrafficConditionNear, IsSpecialDay, VehicleType
):

    BASE_PRICE = 10.0
    lambda_ = 0.8

    demand = demand_function({
        'Occupancy': Occupancy,
        'Capacity': Capacity,
        'QueueLength': QueueLength,
        'TrafficConditionNear': TrafficConditionNearby,
        'IsSpecialDay': IsSpecialDay,
        'VehicleType': VehicleType
    })

    norm_demand = min(max(demand / 10, 0), 1)
    price = BASE_PRICE * (1 + lambda_ * norm_demand)
    price = np.clip(price, 0.5 * BASE_PRICE, 2 * BASE_PRICE)
    return float(price)


stream = stream.with_columns(
    price=pw.apply(
        compute_price,
        stream.Occupancy,
        stream.Capacity,
        stream.QueueLength,
        stream.TrafficConditionNearby,
        stream.IsSpecialDay,
        stream.VehicleType
    )
)


In [None]:
pw.io.csv.write(stream, "output.csv")


pw.run()


result_df = pd.read_csv("output.csv")

In [None]:
import pandas as pd
df_check = pd.read_csv('/content/dataset.csv')
print(df_check.columns)

In [3]:
!pip install pathway bokeh geopy --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.6/77.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.6/777.6 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [6]:
import pathway as pw
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from geopy.distance import geodesic
import asyncio
output_notebook()
from bokeh.palettes import Category10


# --- Define the corrected ParkingSchema ---
class ParkingSchema(pw.Schema):
    SystemCodeNumber: str
    Capacity: float
    Latitude: float
    Longitude: float
    Occupancy: float
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: float
    IsSpecialDay: float
    LastUpdatedDate: str
    LastUpdatedTime: str

def vehicle_type_weight(vtype):
    return {'car': 1.0, 'bike': 0.7, 'truck': 1.5, 'cycle': 0.5}.get(str(vtype).lower(), 1.0)

def traffic_level_num(traffic):
    return {'low': 0, 'average': 0.5, 'high': 1}.get(str(traffic).lower(), 0.5)

def normalize(series):

    return (series - series.min()) / (series.max() - series.min() + 1e-6)

def demand_function(row):
    alpha, beta, gamma, delta, epsilon = 1, 0.5, 0.3, 2, 1

    return (
        alpha * (row['Occupancy'] / row['Capacity'] if row['Capacity'] > 0 else 0) +
        beta * row['QueueLength'] -
        gamma * traffic_level_num(row['TrafficConditionNearby']) + # Corrected column name
        delta * row['IsSpecialDay'] +
        epsilon * vehicle_type_weight(row['VehicleType'])
    )


@pw.udf
def compute_price(
    Occupancy, Capacity, QueueLength, TrafficConditionNearby, IsSpecialDay, VehicleType
):
    # Demand-based pricing (Model 2)
    BASE_PRICE = 10.0
    lambda_ = 0.8

    demand = demand_function({
        'Occupancy': Occupancy,
        'Capacity': Capacity,
        'QueueLength': QueueLength,
        'TrafficConditionNearby': TrafficConditionNearby,
        'IsSpecialDay': IsSpecialDay,
        'VehicleType': VehicleType
    })

    norm_demand = min(max(demand / 10, 0), 1) # Simple clipping for demo
    price = BASE_PRICE * (1 + lambda_ * norm_demand)
    price = np.clip(price, 0.5 * BASE_PRICE, 2 * BASE_PRICE)
    return float(price)


csv_path = '/content/dataset.csv'
stream = pw.demo.replay_csv(
    path=csv_path,
    schema=ParkingSchema,
    input_rate=2
)


stream = stream.with_columns(
  price = compute_price(
    stream.Occupancy,
    stream.Capacity,
    stream.QueueLength,
    stream.TrafficConditionNearby,
    stream.IsSpecialDay,
    stream.VehicleType
)

)

stream = stream.select(
    stream.SystemCodeNumber,
    stream.price,
    stream.LastUpdatedDate,
    stream.LastUpdatedTime,
    stream.Occupancy,
    stream.Capacity,
    stream.Latitude,
    stream.Longitude,
    stream.VehicleType,
    stream.TrafficConditionNearby,
    stream.QueueLength,
    stream.IsSpecialDay
)


output_csv_path = '/content/price_output.csv'
pw.io.csv.write(stream, output_csv_path)


print("Running Pathway pipeline...")

pw.run()
print("Pathway pipeline finished.")



import time
time.sleep(2)



Output()

Running Pathway pipeline...


ERROR:pathway_engine.connectors:There had been an error processing the row read result: FileNotFoundError: [Errno 2] No such file or directory: '/content/dataset.csv'


FileNotFoundError: [Errno 2] No such file or directory: '/content/dataset.csv'

In [3]:
output_csv_path = '/content/price_output.csv'  # Add this line

try:
    result_df = pd.read_csv(output_csv_path)

    result_df['timestamp'] = result_df['LastUpdatedDate'].astype(str) + ' ' + result_df['LastUpdatedTime'].astype(str)
    result_df = result_df.sort_values(['SystemCodeNumber', 'timestamp']).reset_index(drop=True)

    print("First 5 rows of the output DataFrame:")
    display(result_df.head())

    def plot_realtime(df_to_plot):
        p = figure(title="Real-Time Parking Price", x_axis_label='Timestamp', y_axis_label='Price', width=900, height=400)
        colors = Category10[10] + Category10[4]

        for idx, lot_id in enumerate(df_to_plot['SystemCodeNumber'].unique()):
            lot_df = df_to_plot[df_to_plot['SystemCodeNumber'] == lot_id].copy()
            lot_df = lot_df.sort_values('timestamp')

            source = ColumnDataSource(data=dict(
                time=lot_df['timestamp'],
                price=lot_df['price']
            ))
            p.line('time', 'price', source=source, legend_label=str(lot_id), line_width=2, color=colors[idx % len(colors)])

        p.legend.location = "top_left"
        if len(df_to_plot['SystemCodeNumber'].unique()) > 10:
            p.legend.click_policy = "hide"

        show(p)

    if not result_df.empty:
        plot_realtime(result_df)
    else:
        print("Output DataFrame is empty, cannot plot.")

except FileNotFoundError:
    print(f"Output file not found at {output_csv_path}. Make sure the Pathway run completed successfully.")
except Exception as e:
    print(f"An error occurred while reading the CSV or plotting: {e}")


First 5 rows of the output DataFrame:


Unnamed: 0,SystemCodeNumber,price,LastUpdatedDate,LastUpdatedTime,Occupancy,Capacity,Latitude,Longitude,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,time,diff,timestamp
0,BHMBCCMKT01,11.269324,01-11-2016,08:06:00,50.0,577.0,26.144536,91.736172,car,low,1.0,0.0,1751906901232,1,01-11-2016 08:06:00
1,BHMBCCMKT01,11.27487,01-11-2016,08:26:00,54.0,577.0,26.144536,91.736172,car,low,1.0,0.0,1751906901732,1,01-11-2016 08:26:00
2,BHMBCCMKT01,11.575667,01-11-2016,09:00:00,69.0,577.0,26.144536,91.736172,car,average,2.0,0.0,1751906902232,1,01-11-2016 09:00:00
3,BHMBCCMKT01,11.32617,01-11-2016,09:26:00,91.0,577.0,26.144536,91.736172,car,low,1.0,0.0,1751906902732,1,01-11-2016 09:26:00
4,BHMBCCMKT01,12.467175,01-11-2016,10:00:00,135.0,577.0,26.144536,91.736172,truck,average,3.0,0.0,1751906903232,1,01-11-2016 10:00:00


In [8]:
import pandas as pd
import time
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource

output_notebook()

time.sleep(2)

try:
    result_df = pd.read_csv("/content/price_output.csv")

    # Convert date + time to timestamp, assuming DD-MM-YYYY format
    result_df['timestamp'] = pd.to_datetime(
        result_df['LastUpdatedDate'] + ' ' + result_df['LastUpdatedTime'],
        dayfirst=True
    )

    # Group by day and average price
    daily_df = result_df.groupby(result_df['timestamp'].dt.date)['price'].mean().reset_index()
    daily_df.columns = ['t', 'price']
    daily_df['t'] = pd.to_datetime(daily_df['t'])

    # Plot
    def plot_daily_price(df):
        p = figure(title="Daily Aggregated Dynamic Parking Prices",
                   x_axis_type='datetime', width=900, height=400)
        source = ColumnDataSource(df)

        p.line(x='t', y='price', source=source, line_width=2, color='navy')
        p.circle(x='t', y='price', source=source, size=6, color='red')

        p.xaxis.axis_label = "Date"
        p.yaxis.axis_label = "Price"
        show(p)

    plot_daily_price(daily_df)

except Exception as e:
    print("❌ Error loading output or plotting:", e)




In [13]:
import pandas as pd
import time
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource

output_notebook()

time.sleep(2)

try:
    # Read CSV
    result_df = pd.read_csv("/content/price_output.csv")

    # Parse datetime with day-first format
    result_df['timestamp'] = pd.to_datetime(
        result_df['LastUpdatedDate'] + ' ' + result_df['LastUpdatedTime'],
        dayfirst=True
    )

    # Aggregate by date
    daily_df = result_df.groupby(result_df['timestamp'].dt.date)['price'].mean().reset_index()
    daily_df.columns = ['t', 'price']
    daily_df['t'] = pd.to_datetime(daily_df['t'])

    # Plot function
    def plot_daily_price(df):  # 👈 Make sure you use this parameter correctly
        p = figure(title="Daily Aggregated Dynamic Parking Prices",
                   x_axis_type='datetime', width=900, height=400)
        source = ColumnDataSource(df)

        p.line(x='t', y='price', source=source, line_width=2, color='navy')
        p.scatter(x='t', y='price', source=source, size=6, color='red', marker='circle')

        p.xaxis.axis_label = "Date"
        p.yaxis.axis_label = "Price"
        show(p)

    # Call plot with daily_df
    plot_daily_price(daily_df)

except Exception as e:
    print("❌ Error loading output or plotting:", e)


In [14]:
result_df.to_csv("daily_price_for_report.csv", index=False)
