### Crawl data and save to parking_rates.csv

Run this to reset/update the csv

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_parking_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    parking_data = []

    city_rows = soup.find_all("div", class_="row-city")
    for city in city_rows:
        city_name = city.find("div", class_="col-city-2").get_text(strip=True)

        area_rows = soup.find_all("div", class_="row-area", attrs={"data-id": lambda x: x and x.startswith(city.get('data-id') + '-')})
        
        for area in area_rows:
            zone_data = area.find_next("div", class_="zone-data")
            if zone_data:
                zone_number = zone_data.find("strong").text.strip()
                zone_name = zone_data.find_all("strong")[1].text.strip()
                zone_description = zone_data.find_all("strong")[2].text.strip()

                rates_box = area.find_next("div", class_="zone-rates")
                if rates_box:
                    rate_rows = rates_box.find_all("div", class_="zone-rates-row")
                    for rate in rate_rows:
                        user_type = rate.find_all("div", class_="zone-rates-col")[0].text.strip()
                        pricing = rate.find_all("div", class_="zone-rates-col")[1].text.strip()
                        
                        parking_data.append({
                            "City": city_name,
                            "Zone Number": zone_number,
                            "Zone Name": zone_name,
                            "Zone Description": zone_description,
                            "User Type": user_type,
                            "Pricing": pricing
                        })

    df = pd.DataFrame(parking_data)
    return df

url = "https://site.cellopark.com.au/tariffs/"
parking_df = get_parking_data(url)
csv_filename = "./data/parking_rates_backup.csv"
parking_df.to_csv(csv_filename, index=False)
print(f"Data successfully saved to {csv_filename}")
print("Preview of the first 5 rows of the scraped data:")
print(parking_df.head())

Data successfully saved to parking_rates.csv
Preview of the first 5 rows of the scraped data:
                         City                                       Zone  \
0  NSW - Macquarie University   2109100 - General Parking - Green Zone 1   
1  NSW - Macquarie University   2109100 - General Parking - Green Zone 1   
2  NSW - Macquarie University   2109100 - General Parking - Green Zone 1   
3  NSW - Macquarie University  2109200 - General Parking - Yellow Zone 2   
4  NSW - Macquarie University  2109200 - General Parking - Yellow Zone 2   

  User Type                                            Pricing  
0   Visitor  First hour $21.30 then $10.60  per hour (pro-r...  
1     Staff  $4.0 per hour (pro-rata), Mon-Sat, 06:00-18:00...  
2   Student  $4.0 per hour (pro-rata), Mon-Sat, 06:00-18:00...  
3   Visitor  First hour $21.30 then $10.60  per hour (pro-r...  
4     Staff  $3.6 per hour (pro-rata), Mon-Sat, 06:00-18:00...  


### Do things

In [1]:
# Use LLM to reason the free parking time for each row --> save to new column

from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": "Write a haiku about recursion in programming."
        }
    ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content='Functions call themselves,  \nInfinite depths of insight,  \nLoops within the mind.  ', role='assistant', function_call=None, tool_calls=None, refusal=None, annotations=[])


None


In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Cross-validation setup
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = cross_val_score(clf, X_train, y_train, cv=cv, scoring='accuracy')
print(f'Cross-validation scores: {cross_val_scores}')
print(f'Mean cross-validation accuracy: {np.mean(cross_val_scores):.4f}')

# Train the model on full training set
clf.fit(X_train, y_train)

# Validate on the hold-out validation set
y_pred = clf.predict(X_val)
validation_accuracy = accuracy_score(y_val, y_pred)
print(f'Validation accuracy: {validation_accuracy:.4f}')


Cross-validation scores: [0.95833333 1.         0.95833333 0.91666667 0.91666667]
Mean cross-validation accuracy: 0.9500
Validation accuracy: 0.9000


In [3]:
cross_val_scores

array([0.95833333, 1.        , 0.95833333, 0.91666667, 0.91666667])

In [None]:
import pandas as pd
from openai import OpenAI
import time
import json
import re

client = OpenAI()

def extract_json(text):
    """
    Try to parse the whole text as JSON, or extract the first JSON object from the text.
    """
    try:
        return json.loads(text)
    except Exception:
        pass
    match = re.search(r'\{[\s\S]*\}', text)
    if match:
        try:
            return json.loads(match.group(0))
        except Exception:
            return None
    return None

def ask_gpt(row):
    prompt = f"""
You are an expert in interpreting parking meter rules for Brisbane.

IMPORTANT LOGIC:
- Always check RESTRICTIONS first. If a restriction applies at a given time, parking is NOT allowed, regardless of operational time or rates.
- If a restriction contains multiple time ranges (e.g. "CLEARWAY 7AM-9AM,4PM-7PM MON-FRI TOWAWAY"), you must treat ALL time ranges as restricted. Do not miss or skip any restricted period.
- If there are multiple restrictions in a single string (e.g. "LZ 5AM-3PM M-F, BZ 3PM-7PM M-F & 12:30AM-6AM S-S"), each restriction must be parsed and applied to the correct time range and days.
- If a restriction uses abbreviations, expand them: C/W or C/WAY = CLEARWAY, LZ = LOADING ZONE, BZ = BUS ZONE. All are restricted (no parking) during their times.
- If a restriction says "ALL OTHER TIMES", it means that outside the specified operational times, the restriction applies (e.g. "TAXI ZONE ALL OTHER TIMES" means you cannot park outside the operational times).
- Only if there is NO restriction at a given time, check OPERATIONAL_DAY and OPERATIONAL_TIME to see if paid parking applies.
- If it is not an operational day, but the logic below says it should be free (e.g. MON-FRI on weekends), then it is free.
- If it is an operational day, check if the time is within operational hours. If not, it is free.
- If it is within operational hours, check the rate. If the rate is 0 or empty, it is free. Otherwise, it is paid and show the fee.

Restriction types: TOWAWAY, BUS ZONE, CLEARWAY, NO STOPPING, LOADING ZONE, PASSENGER LOADING ZONE, M/C PARKING ONLY, TAXI ZONE, C/W, C/WAY, LZ, BZ, JAZZ CLUB.
- C/W or C/WAY means CLEARWAY (restricted, no parking during its time).
- LZ means LOADING ZONE (restricted, no parking during its time).
- BZ means BUS ZONE (restricted, no parking during its time).

OPERATIONAL_DAY can be: MON-FRI, 7 DAYS, 6 DAYS, 5 DAYS, SAT-SUN, 7.

OPERATIONAL_TIME can be: 
- A single range (e.g. 7AM-7PM)
- Multiple ranges (e.g. 7AM-7PM,7PM-10PM(MON-FRI),7AM-7PM(SAT,SUN))
- Mixed with/without day notes (e.g. 9AM-4PM (MON-FRI), 7PM-10PM MON-FRI)

Special rules:
- If OPERATIONAL_DAY is MON-FRI or 5 DAYS, you can park for free all day on Saturday and Sunday.
- For restrictions like 'NO STOPPING 6AM-9AM,4PM-7PM MON-FRI', you cannot park from 6AM-9AM and 4PM-7PM on Monday to Friday.
- For restrictions like 'C/WAY & N/S 7AM-7PM MON-FRI TOWAWAY', it means Clearway and No Stopping from 7AM-7PM Monday to Friday.
- For a restriction like 'BUS ZONE 6AM-7PM MON-FRI TOWAWAY CLEARWAY C/WAY', if the current time is not in 6AM-7PM MON-FRI, then you can park there. To see if you need to pay, go to the next condition.
- For OPERATIONAL_DAY: 7 DAYS and OPERATIONAL_TIME: 7PM-10PM Mon-Fri, 7AM-7PM Sat-Sun, if the operational time is not in 7PM-10PM Mon-Fri or 7AM-7PM Sat-Sun, you can park for free. Otherwise, you need to pay.

ADDITIONAL INSTRUCTIONS:
- For each day, your output must start at 00:00 and cover the full 24 hours, with no gaps or overlaps. Every minute of the day must be accounted for as either Free, Paid, or Restricted (No parking).
- If a time range ends at, for example, 16:00, the next range must start at 16:00.
- Use 24-hour format (e.g., "00:00", "07:00", "16:00", "19:00", "23:59") for all 'from' and 'to' times for consistency.

WORKED EXAMPLES:

1. If RESTRICTIONS is 'LZ 5AM-3PM M-F, BZ 3PM-7PM M-F & 12:30AM-6AM S-S':
    - Loading zone (no parking) from 05:00-15:00 Monday to Friday
    - Bus zone (no parking) from 15:00-19:00 Monday to Friday
    - Bus zone (no parking) from 00:30-06:00 Saturday and Sunday
    - All other times: check operational time/rate or free

2. If RESTRICTIONS is 'TAXI ZONE ALL OTHER TIMES' and OPERATIONAL_TIME is '7AM-7PM,7PM-10PM(MON-FRI),7AM-7PM(SAT,SUN)':
    - You can park only during the operational times. All other times are taxi zone (no parking).

3. If RESTRICTIONS is 'C/W 7-11AM,2-7PM & LZ 11AM-2PM MON-SAT TOWAWAY':
    - Clearway (no parking) from 07:00-11:00 and 14:00-19:00 Monday to Saturday
    - Loading zone (no parking) from 11:00-14:00 Monday to Saturday

4. If RESTRICTIONS is 'C/WAY 7-9AM & 4-7PM M-F TOWAWAY':
    - Clearway (no parking) from 07:00-09:00 and 16:00-19:00 Monday to Friday

5. If RESTRICTIONS is 'CLEARWAY 7AM-9AM,4PM-7PM MON-FRI TOWAWAY' and OPERATIONAL_DAY is MON-FRI and OPERATIONAL_TIME is 9AM-16:00, and TAR_RATE_WEEKDAY is 6.15:
    - On Monday:
        - 00:00-07:00: Free
        - 07:00-09:00: No parking (restricted)
        - 09:00-16:00: Paid parking ($6.15)
        - 16:00-19:00: No parking (restricted)
        - 19:00-23:59: Free
    - On Saturday and Sunday: Free parking all day (00:00-23:59)

YOUR TASK:
Given the following parking meter info:
RESTRICTIONS: {row.get('RESTRICTIONS','')}
OPERATIONAL_DAY: {row.get('OPERATIONAL_DAY','')}
OPERATIONAL_TIME: {row.get('OPERATIONAL_TIME','')}
TAR_RATE_WEEKDAY: {row.get('TAR_RATE_WEEKDAY','')}
TAR_RATE_AH_WE: {row.get('TAR_RATE_AH_WE','')}

For each day of the week (Monday to Sunday), list the time ranges when parking is allowed, and for each range, specify if it is Free, Paid (and the fee if paid), or Restricted (No parking).
Format your answer as a valid JSON object with keys MON, TUE, WED, THU, FRI, SAT, SUN.
Each value should be a list of objects with 'from', 'to', 'type' (Free/Paid/No parking), and 'fee' (if paid).
Return ONLY the JSON object, no explanation or markdown.
"""
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    text = response.choices[0].message.content
    # print(row.get('RESTRICTIONS', ''))
    # print("RAW RESPONSE:", text)  # For debugging
    print(row.get('METER_NO', ''), row.get('RESTRICTIONS', ''))
    return extract_json(text)

# Load the CSV
df = pd.read_csv('data/brisbane-parking-meters.csv')

# For demonstration, process only the first 3 rows (remove .head(3) to process all)
for idx, row in df.iterrows():
    gpt_result = ask_gpt(row)
    if gpt_result:
        for day in ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']:
            df.at[idx, f'{day}_PARKING'] = str(gpt_result.get(day, ''))
    else:
        for day in ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']:
            df.at[idx, f'{day}_PARKING'] = ''
    # time.sleep(1)  # To avoid rate limits

# Save to a new CSV
df.to_csv('data/brisbane-parking-meters_with_inferred_times.csv', index=False)

# Save to a JSON file (list of dicts, including the new columns)
df.to_json('data/brisbane-parking-meters_with_inferred_times.json', orient='records', indent=2)


print("Done! Saved to data/brisbane-parking-meters_with_inferred_times.csv")

C/WAY 7-11AM & 2-7PM, LOAD ZONE 11AM-2PM MON-FRI
RAW RESPONSE: {
    "MON": [
        {"from": "00:00", "to": "07:00", "type": "Free"},
        {"from": "07:00", "to": "11:00", "type": "No parking"},
        {"from": "11:00", "to": "14:00", "type": "No parking"},
        {"from": "14:00", "to": "19:00", "type": "No parking"},
        {"from": "19:00", "to": "22:59", "type": "Paid", "fee": 3.25},
        {"from": "23:00", "to": "23:59", "type": "Free"}
    ],
    "TUE": [
        {"from": "00:00", "to": "07:00", "type": "Free"},
        {"from": "07:00", "to": "11:00", "type": "No parking"},
        {"from": "11:00", "to": "14:00", "type": "No parking"},
        {"from": "14:00", "to": "19:00", "type": "No parking"},
        {"from": "19:00", "to": "22:59", "type": "Paid", "fee": 3.25},
        {"from": "23:00", "to": "23:59", "type": "Free"}
    ],
    "WED": [
        {"from": "00:00", "to": "07:00", "type": "Free"},
        {"from": "07:00", "to": "11:00", "type": "No parking"},
    