In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load data
base_dir = "./Datasets"

sensors_df = pd.read_csv(f"{base_dir}/on-street-parking-bay-sensors.csv")
bays_df = pd.read_csv(f"{base_dir}/on-street-parking-bays.csv")
zones_df = pd.read_csv(f"{base_dir}/parking-zones-linked-to-street-segments.csv")
restrictions_df = pd.read_csv(f"{base_dir}/sign-plates-located-in-each-parking-zone.csv")


In [3]:
# 1. Clean sensor data
sensors_clean = sensors_df[sensors_df["Zone_Number"].notna()].copy()
sensors_clean["Status_Timestamp"] = pd.to_datetime(sensors_clean["Status_Timestamp"])
latest_time = sensors_clean['Status_Timestamp'].max()
cutoff_time = latest_time - pd.Timedelta(minutes=30)
sensors_clean = sensors_clean[sensors_clean["Status_Timestamp"] >= cutoff_time]

In [4]:
# 2. Get street coordinates (average by street name)
sensor_zones = sensors_clean['Zone_Number'].unique()
filtered_zones = zones_df[zones_df['ParkingZone'].isin(sensor_zones)]
street_segments = filtered_zones["Segment_ID"].unique()
street_locations = bays_df[bays_df["RoadSegmentID"].isin(street_segments)]

# Get coordinates per segment then average by street
street_coordinates = street_locations.groupby('RoadSegmentID').agg({
    'Latitude': 'mean',
    'Longitude': 'mean'
}).reset_index()

segment_to_street = filtered_zones[['Segment_ID', 'OnStreet']].drop_duplicates()
street_coords = street_coordinates.merge(segment_to_street, left_on='RoadSegmentID', right_on='Segment_ID')
street_avg_coords = street_coords.groupby('OnStreet').agg({
    'Latitude': 'mean',
    'Longitude': 'mean'
}).reset_index()

In [5]:
# 3. Get restrictions (remove PP/DP zones)
zone_restrictions = restrictions_df[restrictions_df["ParkingZone"].isin(sensor_zones)]
parkable_restrictions = zone_restrictions[~zone_restrictions["Restriction_Display"].str.contains("PP|DP", na=False)]

In [6]:
# 4. Calculate parking statistics per zone
zone_stats = sensors_clean.groupby('Zone_Number').agg({
    'KerbsideID': 'count',  # Total spots
    'Status_Description': lambda x: (x == 'Unoccupied').sum()  # Available spots
}).rename(columns={'KerbsideID': 'total_spots', 'Status_Description': 'available_spots'}).reset_index()

In [7]:
# 5. Map zones to streets
zone_to_street = filtered_zones[['ParkingZone', 'OnStreet']].drop_duplicates()

In [8]:
# 6. Create final dataset
# Start with zone stats
final_data = zone_stats.merge(zone_to_street, left_on='Zone_Number', right_on='ParkingZone')

# Add street coordinates
final_data = final_data.merge(street_avg_coords, on='OnStreet')

# Add restrictions (take first restriction per zone if multiple)
restrictions_simple = parkable_restrictions.groupby('ParkingZone').first().reset_index()
restrictions_simple['restriction_detail'] = (
    restrictions_simple['Restriction_Days'] + ' ' + 
    restrictions_simple['Time_Restrictions_Start'].fillna('') + '-' + 
    restrictions_simple['Time_Restrictions_Finish'].fillna('') + ' ' + 
    restrictions_simple['Restriction_Display'].fillna('')
).str.strip()

final_data = final_data.merge(
    restrictions_simple[['ParkingZone', 'restriction_detail']], 
    on='ParkingZone', 
    how='left'
)

In [9]:
# 7. Select and rename columns
export_data = final_data[[
    'OnStreet', 'Latitude', 'Longitude', 'Zone_Number', 
    'total_spots', 'available_spots', 'restriction_detail'
]].rename(columns={
    'OnStreet': 'street_name',
    'Latitude': 'lat',
    'Longitude': 'lon',
    'Zone_Number': 'parking_zone'
})

In [11]:
# 8. Export to CSV
export_data.to_csv('melbourne_parking_data.csv', index=False)
print(f"Exported {len(export_data)} records to melbourne_parking_data.csv")
print(f"Unique streets: {export_data['street_name'].nunique()}")
print(f"Unique zones: {export_data['parking_zone'].nunique()}")

Exported 175 records to melbourne_parking_data.csv
Unique streets: 51
Unique zones: 174
