In [None]:
# --- 0. Environment Setup and Library Imports ---
# Run these commands in your Google Colab notebook to install necessary libraries.
!pip install pandas numpy bokeh pathway # Pathway is included for conceptual understanding, but not fully utilized in the direct runnable simulation here.

import pandas as pd
import numpy as np
import pathway as pw # Imported for conceptual explanation, not directly used in the runnable simulation loop.
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.io import output_notebook, push_notebook
import time
from math import radians, sin, cos, sqrt, atan2

# --- 1. Global Constants and Parameters ---

# Base price for parking
BASE_PRICE = 10.0

# Model 1: Baseline Linear Model parameters
# Price_t+1 = Price_t + alpha * (Occupancy / Capacity)
ALPHA_MODEL1 = 0.5

# Model 2: Demand-Based Price Function parameters
# Price = Base Price * (1 + lambda * NormalizedDemand)
LAMBDA_MODEL2 = 5.0

# Weights for Demand Function (Model 2) - these are example values, tune as needed
# These weights determine the influence of each factor on demand.
WEIGHT_OCCUPANCY_RATE = 0.6
WEIGHT_QUEUE_LENGTH = 0.2
WEIGHT_TRAFFIC = 0.1
WEIGHT_SPECIAL_DAY = 0.1
WEIGHT_VEHICLE_TYPE_CAR = 0.05
WEIGHT_VEHICLE_TYPE_BIKE = 0.02
WEIGHT_VEHICLE_TYPE_TRUCK = 0.03

# Price bounds for Model 2 and 3 to ensure smooth and bounded price variations
MIN_PRICE_FACTOR = 0.5 # Minimum price will be BASE_PRICE * MIN_PRICE_FACTOR
MAX_PRICE_FACTOR = 2.0 # Maximum price will be BASE_PRICE * MAX_PRICE_FACTOR

# --- 2. Helper Functions ---

def calculate_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the distance between two points on Earth using the Haversine formula.
    Returns distance in kilometers.
    """
    R = 6371 # Radius of Earth in kilometers

    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance

# --- 3. Pricing Models Implementations ---

def model1_pricing_logic(df_batch):
    """
    Model 1: Baseline Linear Model
    Price_t+1 = Price_t + alpha * (Occupancy / Capacity)

    Args:
        df_batch (pd.DataFrame): A DataFrame containing a batch of data for parking spaces.
                                 Must include 'Occupancy', 'Capacity', and 'previous_price'.
                                 'previous_price' is initialized to BASE_PRICE if not present.

    Returns:
        pd.DataFrame: The DataFrame with 'predicted_price' added.
    """
    # Ensure 'previous_price' column exists for calculations.
    # In a true streaming Pathway app, this would be managed by stateful transforms.
    if 'previous_price' not in df_batch.columns:
        df_batch['previous_price'] = BASE_PRICE

    # Calculate occupancy rate
    df_batch['occupancy_rate'] = df_batch['Occupancy'] / df_batch['Capacity']
    # Apply the linear pricing formula
    df_batch['predicted_price'] = df_batch['previous_price'] + ALPHA_MODEL1 * df_batch['occupancy_rate']
    # Clip prices to ensure they stay within defined bounds
    df_batch['predicted_price'] = np.clip(df_batch['predicted_price'], BASE_PRICE * MIN_PRICE_FACTOR, BASE_PRICE * MAX_PRICE_FACTOR)
    return df_batch

def calculate_demand(row):
    """
    Calculates a demand score based on various features. Used in Model 2.

    Args:
        row (pd.Series): A single row of parking data.

    Returns:
        float: The calculated demand score.
    """
    # Ensure all required columns are present before calculation
    required_cols = ['Occupancy', 'Capacity', 'Queue length', 'Nearby traffic congestion level',
                     'Special day indicator', 'Type of incoming vehicle']
    for col in required_cols:
        if col not in row.index:
            # Handle missing columns gracefully, e.g., by assigning a default value or skipping
            print(f"Warning: Column '{col}' not found in row. Using default value 0 for demand calculation.")
            if col in ['Occupancy', 'Queue length', 'Nearby traffic congestion level']:
                row[col] = 0
            elif col == 'Capacity':
                row[col] = 1 # Avoid division by zero
            elif col == 'Special day indicator':
                row[col] = 0
            elif col == 'Type of incoming vehicle':
                row[col] = 'car' # Default to 'car'

    occupancy_rate = row['Occupancy'] / row['Capacity']
    # Normalize queue length and traffic using tanh for smoother scaling
    queue_length_normalized = np.tanh(row['Queue length'] / 10.0) # Assuming max queue length around 10-20
    traffic_normalized = np.tanh(row['Nearby traffic congestion level'] / 5.0) # Assuming traffic on a scale, e.g., 0-5

    # Convert 'Special day indicator' to a numeric value (0 or 1)
    is_special_day = 1 if str(row['Special day indicator']).lower() in ['yes', '1', 'true'] else 0

    # Assign weight based on vehicle type
    vehicle_type_weight = 0
    vehicle_type = str(row['Type of incoming vehicle']).lower()
    if vehicle_type == 'car':
        vehicle_type_weight = WEIGHT_VEHICLE_TYPE_CAR
    elif vehicle_type == 'bike':
        vehicle_type_weight = WEIGHT_VEHICLE_TYPE_BIKE
    elif vehicle_type == 'truck':
        vehicle_type_weight = WEIGHT_VEHICLE_TYPE_TRUCK
    # Add 'cycle' as a vehicle type if it exists in the dataset
    elif vehicle_type == 'cycle':
        vehicle_type_weight = WEIGHT_VEHICLE_TYPE_BIKE # Treating cycles like bikes for now, adjust if needed

    # Combine weighted features to get the total demand score
    demand = (WEIGHT_OCCUPANCY_RATE * occupancy_rate +
              WEIGHT_QUEUE_LENGTH * queue_length_normalized +
              WEIGHT_TRAFFIC * traffic_normalized +
              WEIGHT_SPECIAL_DAY * is_special_day +
              vehicle_type_weight)
    return demand

def normalize_demand(demand_series):
    """
    Normalizes a Pandas Series of demand values to a 0-1 range.

    Args:
        demand_series (pd.Series): A Series of demand scores.

    Returns:
        pd.Series: The normalized demand scores.
    """
    min_demand = demand_series.min()
    max_demand = demand_series.max()
    if max_demand == min_demand:
        # If all demand values are the same, return 0.5 to avoid division by zero
        return pd.Series([0.5] * len(demand_series), index=demand_series.index)
    normalized_demand = (demand_series - min_demand) / (max_demand - min_demand)
    return normalized_demand

def model2_pricing_logic(df_batch):
    """
    Model 2: Demand-Based Price Function
    Price = Base Price * (1 + lambda * NormalizedDemand)

    Args:
        df_batch (pd.DataFrame): A DataFrame containing a batch of data for parking spaces.

    Returns:
        pd.DataFrame: The DataFrame with 'demand', 'normalized_demand', and 'predicted_price' added.
    """
    # Calculate demand for each row in the batch
    df_batch['demand'] = df_batch.apply(calculate_demand, axis=1)
    # Normalize the calculated demand across the current batch
    df_batch['normalized_demand'] = normalize_demand(df_batch['demand'])
    # Apply the demand-based pricing formula
    df_batch['predicted_price'] = BASE_PRICE * (1 + LAMBDA_MODEL2 * df_batch['normalized_demand'])
    # Clip prices to ensure they stay within defined bounds
    df_batch['predicted_price'] = np.clip(df_batch['predicted_price'], BASE_PRICE * MIN_PRICE_FACTOR, BASE_PRICE * MAX_PRICE_FACTOR)
    return df_batch

def model3_pricing_logic(df_batch, all_parking_data_at_timestep):
    """
    Model 3: Competitive Pricing Model
    Factors in competitor prices based on proximity.

    Args:
        df_batch (pd.DataFrame): A DataFrame containing the current batch of data for parking spaces,
                                 which should already have 'predicted_price' from Model 2.
        all_parking_data_at_timestep (pd.DataFrame): A DataFrame containing data (including prices and locations)
                                                     for ALL parking spaces at the current time step.
                                                     This is crucial for competitive analysis.

    Returns:
        pd.DataFrame: The DataFrame with 'competitive_factor' and adjusted 'predicted_price' added.
    """
    df_batch['competitive_factor'] = 0.0 # Initialize competitive factor

    for index, row in df_batch.iterrows():
        # Ensure Latitude and Longitude exist
        if 'Latitude' not in row or 'Longitude' not in row:
            print(f"Warning: Latitude or Longitude missing for Parking Space ID {row.get('Parking Space ID', 'Unknown')}. Skipping competitive pricing for this entry.")
            continue

        current_lat = row['Latitude']
        current_lon = row['Longitude']
        current_parking_id = row['Parking Space ID']

        nearby_competitors_prices = []
        # Iterate through all parking spaces at this time step to find competitors
        for _, comp_row in all_parking_data_at_timestep.iterrows():
            if comp_row['Parking Space ID'] != current_parking_id: # Don't compare with self
                # Ensure Latitude and Longitude exist for competitor
                if 'Latitude' not in comp_row or 'Longitude' not in comp_row:
                    continue # Skip this competitor if location data is missing

                comp_lat = comp_row['Latitude']
                comp_lon = comp_row['Longitude']
                distance = calculate_distance(current_lat, current_lon, comp_lat, comp_lon)

                # Consider competitors within a certain radius (e.g., 1 kilometer)
                if distance < 1.0: # This radius can be tuned
                    # Ensure competitor has a predicted price (from Model 2 or previous step)
                    if 'predicted_price' in comp_row:
                        nearby_competitors_prices.append(comp_row['predicted_price'])

        if nearby_competitors_prices:
            avg_competitor_price = np.mean(nearby_competitors_prices)
            current_occupancy_rate = row['Occupancy'] / row['Capacity']

            # Simple competitive logic:
            # 1. If our lot is highly occupied (e.g., >90%) AND nearby competitors are cheaper,
            #    consider slightly reducing our price to encourage flow or avoid overpricing.
            if current_occupancy_rate > 0.9 and avg_competitor_price < row['predicted_price']:
                df_batch.loc[index, 'competitive_factor'] = -0.05 # Reduce price by 5%
            # 2. If nearby competitors are significantly more expensive (e.g., 10% higher),
            #    we can slightly increase our price while remaining attractive.
            elif avg_competitor_price > row['predicted_price'] * 1.1:
                df_batch.loc[index, 'competitive_factor'] = 0.03 # Increase price by 3%

    # Apply the competitive factor to the predicted price
    df_batch['predicted_price'] = df_batch['predicted_price'] * (1 + df_batch['competitive_factor'])
    # Clip prices again after competitive adjustment
    df_batch['predicted_price'] = np.clip(df_batch['predicted_price'], BASE_PRICE * MIN_PRICE_FACTOR, BASE_PRICE * MAX_PRICE_FACTOR)
    return df_batch

def apply_pricing_model(df_batch, current_model_choice, all_parking_data_at_timestep):
    """
    Applies the chosen pricing model to a batch of incoming data.

    Args:
        df_batch (pd.DataFrame): The current batch of streaming data.
        current_model_choice (int): The ID of the model to use (1, 2, or 3).
        all_parking_data_at_timestep (pd.DataFrame): Snapshot of all parking data at the current time step.
                                                     Required for Model 3.

    Returns:
        pd.DataFrame: The DataFrame with 'predicted_price' based on the chosen model.
    """
    if current_model_choice == 1:
        return model1_pricing_logic(df_batch.copy())
    elif current_model_choice == 2:
        return model2_pricing_logic(df_batch.copy())
    elif current_model_choice == 3:
        # For Model 3, first apply Model 2 to get a base price, then apply competitive logic
        df_with_model2_prices = model2_pricing_logic(df_batch.copy())
        return model3_pricing_logic(df_with_model2_prices, all_parking_data_at_timestep)
    else:
        # Default to base price if no valid model is chosen
        df_batch['predicted_price'] = BASE_PRICE
        return df_batch

# --- 4. Real-time Simulation Loop with Bokeh Visualization ---

def run_simulation(data_path, selected_model):
    """
    Simulates real-time data streaming and applies the dynamic pricing model,
    visualizing results with Bokeh.

    Args:
        data_path (str): Path to the dataset CSV file.
        selected_model (int): The pricing model to use (1, 2, or 3).
    """
    print(f"Starting simulation for Model {selected_model}...")

    # Load the entire dataset. We will simulate streaming by processing it time-step by time-step.
    full_data = pd.read_csv(data_path)

    # --- Robust Column Name Handling ---
    # Define a mapping from potential CSV column names to expected column names in the code
    column_name_mapping = {
        'id': 'Parking Space ID',
        'systemcodenumber': 'Parking Space ID', # Assuming SystemCodeNumber can also serve as ID if 'ID' is missing
        'lastupdateddate': 'Date',
        'lastupdatedtime': 'Time', # Will be combined with Date or used to infer Time Point
        'vehicletype': 'Type of incoming vehicle',
        'trafficconditionnearby': 'Nearby traffic congestion level',
        'queuelength': 'Queue length',
        'isspecialday': 'Special day indicator',
        'capacity': 'Capacity',
        'occupancy': 'Occupancy',
        'latitude': 'Latitude',
        'longitude': 'Longitude'
    }

    # Standardize column names
    current_columns = {col.lower(): col for col in full_data.columns}
    for old_name_lower, new_name in column_name_mapping.items():
        if new_name not in full_data.columns: # Only rename if the target column name doesn't already exist
            if old_name_lower in current_columns:
                original_col_name = current_columns[old_name_lower]
                full_data.rename(columns={original_col_name: new_name}, inplace=True)
                print(f"Renamed column '{original_col_name}' to '{new_name}'.")

    # Final check for critical columns after renaming
    critical_columns = ['Parking Space ID', 'Capacity', 'Occupancy', 'Latitude', 'Longitude',
                        'Type of incoming vehicle', 'Nearby traffic congestion level', 'Queue length',
                        'Special day indicator'] # Corrected: 'TrafficConditionNearby' to 'Nearby traffic congestion level'
    for col in critical_columns:
        if col not in full_data.columns:
            print(f"Error: Critical column '{col}' not found in the dataset after renaming attempts. Available columns: {full_data.columns.tolist()}")
            raise KeyError(f"Critical column '{col}' is missing or named differently in dataset.csv. Please ensure it exists.")


    # Ensure 'Time Point' is correctly identified for plotting
    # If 'Time Point' column is missing, create it based on cumulative count per parking space.
    # If 'Time' (LastUpdatedTime) exists, combine with 'Date' to create a proper datetime for sorting.
    if 'Time Point' not in full_data.columns:
        print("Warning: 'Time Point' column not found.")
        if 'Date' in full_data.columns and 'Time' in full_data.columns:
            print("Combining 'Date' and 'Time' to create a full timestamp for sorting.")
            # Combine 'Date' and 'Time' into a single datetime column for accurate sorting
            full_data['FullTimestamp'] = pd.to_datetime(full_data['Date'] + ' ' + full_data['Time'], errors='coerce', dayfirst=True)
            full_data.sort_values(by=['Parking Space ID', 'FullTimestamp'], inplace=True)
            full_data.dropna(subset=['FullTimestamp'], inplace=True)
            # Create 'Time Point' as a sequential integer for plotting within each parking space's daily data
            full_data['Time Point'] = full_data.groupby(['Parking Space ID', full_data['FullTimestamp'].dt.date]).cumcount() + 1
        else:
            print("Generating 'Time Point' based on cumulative count per parking space (less precise without full timestamp).")
            full_data['Time Point'] = full_data.groupby('Parking Space ID').cumcount() + 1
    else:
        # Ensure 'Time Point' is numeric, if it exists
        full_data['Time Point'] = pd.to_numeric(full_data['Time Point'], errors='coerce')
        full_data.dropna(subset=['Time Point'], inplace=True) # Drop rows where Time Point couldn't be converted

    # Convert 'Date' column to datetime objects if it exists after renaming
    if 'Date' in full_data.columns:
        full_data['Date'] = pd.to_datetime(full_data['Date'], errors='coerce', dayfirst=True) # Use dayfirst for DD-MM-YYYY format
        full_data.dropna(subset=['Date'], inplace=True) # Drop rows where Date couldn't be converted


    # Get unique parking space IDs for setting up Bokeh plots
    parking_space_ids = sorted(full_data['Parking Space ID'].unique())

    # Initialize previous_price for all parking spaces for Model 1's state management.
    # This dictionary will hold the last predicted price for each parking space.
    parking_space_state = {pid: {'previous_price': BASE_PRICE} for pid in parking_space_ids}

    # --- Bokeh Visualization Setup ---
    output_notebook() # Directs Bokeh to render plots in the Jupyter/Colab notebook output

    # Create the main Bokeh figure
    p = figure(
        x_axis_label="Time Point (Simulated)",
        y_axis_label="Predicted Price ($)",
        title=f"Real-time Parking Price Prediction (Model {selected_model})",
        height=500,
        width=900,
        x_range=(0, full_data['Time Point'].max() + 1), # X-axis range based on max time point
        y_range=(BASE_PRICE * MIN_PRICE_FACTOR * 0.9, BASE_PRICE * MAX_PRICE_FACTOR * 1.1) # Y-axis with buffer
    )

    # Dictionary to hold ColumnDataSource for each parking space.
    # Each line on the plot will have its own data source.
    parking_sources = {}
    # Define a color palette for the lines
    colors = ["#e6194b", "#3cb44b", "#ffe119", "#4363d8", "#f58231", "#911eb4", "#46f0f0", "#f032e6",
              "#bcf60c", "#fabebe", "#008080", "#e6beff", "#9a6324", "#fffac8"]

    # Initialize a line glyph and ColumnDataSource for each parking space
    for i, pid in enumerate(parking_space_ids):
        parking_sources[pid] = ColumnDataSource(data={'time': [], 'price': []})
        p.line(
            x='time', y='price',
            legend_label=f"Parking {pid}",
            line_width=2,
            color=colors[i % len(colors)], # Assign a color from the palette
            source=parking_sources[pid] # Link the line to its specific data source
        )

    p.legend.location = "top_left"
    p.legend.click_policy="hide" # Allows users to hide/show individual lines by clicking their legend entry

    # Show the plot initially to get the handle for updates
    # `notebook_handle=True` is crucial for `push_notebook` to work
    handle = show(p, notebook_handle=True)

    # --- Simulate Streaming Data and Update Plot ---
    # Determine how to iterate through data: by 'Date' and 'Time Point' or just 'Time Point'
    if 'Date' in full_data.columns and 'FullTimestamp' in full_data.columns:
        # Sort by full timestamp to ensure correct chronological order across all parking spaces
        full_data.sort_values(by='FullTimestamp', inplace=True)
        # Iterate through unique timestamps to process all parking spaces at that moment
        unique_timestamps = sorted(full_data['FullTimestamp'].unique())
        time_iteration_column = 'FullTimestamp'
    else:
        # Fallback to 'Time Point' if full timestamp isn't available or preferred for iteration
        full_data.sort_values(by=['Parking Space ID', 'Time Point'], inplace=True)
        unique_timestamps = sorted(full_data['Time Point'].unique())
        time_iteration_column = 'Time Point'


    for current_timestamp_value in unique_timestamps:
        current_time_data = full_data[full_data[time_iteration_column] == current_timestamp_value].copy()

        # For Model 1, update the 'previous_price' column in the current batch
        # based on the state from the previous time step.
        if selected_model == 1:
            for pid in current_time_data['Parking Space ID'].unique():
                current_time_data.loc[current_time_data['Parking Space ID'] == pid, 'previous_price'] = \
                    parking_space_state[pid]['previous_price']

        # Apply the selected pricing model to the current batch of data.
        # For Model 3, pass the entire `current_time_data` as `all_parking_data_at_timestep`
        # to allow competitive analysis across all parking spaces at this moment.
        processed_data = apply_pricing_model(current_time_data, selected_model, current_time_data.copy())

        # Update the state (previous price) for Model 1 for the next iteration
        if selected_model == 1:
            for pid in processed_data['Parking Space ID'].unique():
                # Ensure the parking space exists in the processed data before updating state
                if not processed_data[processed_data['Parking Space ID'] == pid].empty:
                    parking_space_state[pid]['previous_price'] = \
                        processed_data.loc[processed_data['Parking Space ID'] == pid, 'predicted_price'].iloc[0]

        # Update Bokeh ColumnDataSource for each parking space
        for pid in parking_space_ids:
            # Get the row corresponding to the current parking space at the current time point
            parking_row = processed_data[processed_data['Parking Space ID'] == pid]
            if not parking_row.empty:
                # Use 'Time Point' for plotting X-axis regardless of internal iteration method
                new_time = [parking_row['Time Point'].iloc[0]]
                new_price = [parking_row['predicted_price'].iloc[0]]
                # Stream the new data point to the respective ColumnDataSource
                parking_sources[pid].stream({'time': new_time, 'price': new_price})

        # Push the updated plot to the notebook output
        push_notebook(handle=handle)
        time.sleep(0.05) # Simulate a small delay for real-time effect (adjust as needed)

    print(f"\nSimulation for Model {selected_model} completed for all data points.")
    print("Final predicted prices for the last time point:")
    # Display the final predicted prices for all parking spaces from the last processed batch
    print(processed_data[['Parking Space ID', 'predicted_price']])

# --- 5. Conceptual Pathway Integration (for reference) ---
# This section outlines how Pathway would be used for a true real-time application.
# The runnable code above simulates this behavior using Pandas and Bokeh directly.

# @pw.udf
# def pathway_model1_udf(row, state):
#     # This UDF would be applied to each incoming row in a Pathway stream.
#     # 'state' would manage the 'previous_price' for each parking space.
#     # This is a highly simplified representation.
#     occupancy_rate = row.Occupancy / row.Capacity
#     previous_price = state.get('price', BASE_PRICE) # Get previous price from state
#     predicted_price = previous_price + ALPHA_MODEL1 * occupancy_rate
#     predicted_price = np.clip(predicted_price, BASE_PRICE * MIN_PRICE_FACTOR, BASE_PRICE * MAX_PRICE_FACTOR)
#     state['price'] = predicted_price # Update state for next iteration
#     return {'Parking Space ID': row['Parking Space ID'], 'predicted_price': predicted_price, 'time_point': row['Time Point']}

# def run_pathway_app(data_path, selected_model):
#     # Initialize Pathway context
#     # pw.set_debug_mode(True) # Optional: for debugging Pathway tables

#     # 1. Data Ingestion: Read CSV as a streaming table
#     # This simulates new data arriving over time.
#     input_stream = pw.io.csv.read(
#         data_path,
#         schema=None, # Pathway can infer schema
#         mode="streaming",
#         autocommit_duration_ms=100 # Simulate small batches arriving every 100ms
#     )

#     # 2. Feature Engineering and Model Application
#     # For Model 1 (stateful):
#     if selected_model == 1:
#         # Use a stateful transform to carry 'previous_price' for each parking space.
#         # This would involve `pw.state.min_state` or a custom stateful UDF.
#         # Example (conceptual, requires proper state management in UDF):
#         # predicted_prices = input_stream.group_by(input_stream.Parking_Space_ID).map(
#         #     lambda row, state: pathway_model1_udf(row, state)
#         # )
#         pass # Placeholder for actual Pathway stateful logic

#     # For Model 2 (stateless per row, but needs batch-level normalization):
#     elif selected_model == 2:
#         # This would involve `pw.map` for `calculate_demand`
#         # and then a `pw.reduce` or another `pw.map` with a global view for `normalize_demand`.
#         # This is more complex in Pathway than a simple row-wise map.
#         pass # Placeholder for actual Pathway logic

#     # For Model 3 (needs joins for competitive data):
#     elif selected_model == 3:
#         # This would involve joining the input_stream with itself or another table
#         # representing the current state of all parking lots, based on proximity.
#         pass # Placeholder for actual Pathway join logic

#     # 3. Output and Visualization
#     # Output the predicted prices to a file or a visualization sink.
#     # For Bokeh, Pathway has `pw.io.bokeh.write` but it usually requires a running Bokeh server.
#     # predicted_prices.write(pw.io.csv.write("predicted_prices_pathway.csv"))

#     # Start the Pathway data processing engine (this is blocking)
#     # pw.run()

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define the path to your dataset.csv file
    # Make sure this file is uploaded to your Colab environment or accessible.
    DATASET_PATH = '/content/dataset.csv'

    # Choose which model to run (1, 2, or 3)
    # model_to_run = 1 # Uncomment to run Model 1
    # model_to_run = 2 # Uncomment to run Model 2
    model_to_run = 3 # Uncomment to run Model 3 (Recommended for full features)

    run_simulation(DATASET_PATH, model_to_run)

    # You can also conceptually call the Pathway function here if you had a complete Pathway setup:
    # print("\nConceptual Pathway execution (requires full Pathway setup and specific UDFs):")
    # run_pathway_app(DATASET_PATH, model_to_run)


Starting simulation for Model 3...
Renamed column 'ID' to 'Parking Space ID'.
Renamed column 'LastUpdatedDate' to 'Date'.
Renamed column 'LastUpdatedTime' to 'Time'.
Renamed column 'VehicleType' to 'Type of incoming vehicle'.
Renamed column 'TrafficConditionNearby' to 'Nearby traffic congestion level'.
Renamed column 'QueueLength' to 'Queue length'.
Renamed column 'IsSpecialDay' to 'Special day indicator'.
Combining 'Date' and 'Time' to create a full timestamp for sorting.
