In [None]:
#!/usr/bin/env python3
import json
import os
import time
import requests

BASE_URL = "https://www.onthehouse.com.au/odin/api/marketstats/markets/trends"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Accept": "application/json",
}

# 1. State files and property types
state_files = [
    'NSW_suburbs.json',
    'NT_suburbs.json',
    'QLD_suburbs.json',
    'SA_suburbs.json',
    'TAS_suburbs.json',
    'VIC_suburbs.json',
    'WA_suburbs.json',
    'ACT_suburbs.json',
]
PROPERTY_TYPES = ["House", "Unit"]
TIME_PERIOD_YEARS = 5  # adjust if needed

# 2. Load all suburb_postcodes into a dict keyed by state code
state_entries = {}
for filename in state_files:
    state = os.path.splitext(filename)[0].split('_')[0]  # e.g. 'NSW'
    with open(filename, 'r', encoding='utf-8') as f:
        data = json.load(f)

    entries = [
        {
            'suburb': entry['text'].split(',', 1)[0].strip().upper().replace(' ', '+'),
            'postcode': entry['text'].split(',', 1)[1].strip()
        }
        for entry in data
    ]
    state_entries[state] = entries

# 3. Sanity check
for state, entries in state_entries.items():
    print(f"{state}: {len(entries)} entries")
    print(entries[:5], "\n")

# 4. Fetch trends for every state & property type
for state, entries in state_entries.items():
    for ptype in PROPERTY_TYPES:
        records = []
        for entry in entries:
            suburb = entry['suburb']
            postcode = entry['postcode']
            url = (
                f"{BASE_URL}/{suburb}/{state}/{postcode}"
                f"?propertyType={ptype}&timePeriod={TIME_PERIOD_YEARS}"
            )
            try:
                resp = requests.get(url, headers=HEADERS, timeout=10)
                resp.raise_for_status()
                data = resp.json()
            except requests.exceptions.RequestException as e:
                print(f"[ERROR]   {state} {ptype} – {suburb}, {postcode} → {e}")
                continue
            except ValueError:
                print(f"[ERROR]   {state} {ptype} – {suburb}, {postcode} → invalid JSON")
                continue

            records.append({"suburb": suburb, "data": data})
            print(f"[SUCCESS] {state} {ptype} – {suburb}, {postcode}")
            time.sleep(1)  # polite pause

        out_fname = f"market_trends_{state.lower()}_{ptype.lower()}s.json"
        with open(out_fname, "w", encoding="utf-8") as f:
            json.dump(records, f, ensure_ascii=False, indent=2)
        print(f"→ Saved {len(records)} records to {out_fname}\n")


In [None]:
#!/usr/bin/env python3
import json
import glob
import os
import pandas as pd

# 1. grab every trends JSON your fetch-script produces
#    (e.g. market_trends_nsw_houses.json, market_trends_vic_units.json, etc.)
json_files = glob.glob("market_trends_*_*.json")

for json_file in json_files:
    print(f"\n🛠  Processing {json_file}...")
    payload = json.load(open(json_file, encoding="utf-8"))
    records = []

    # 2. flatten into a long form
    for loc in payload:
        suburb = loc.get("suburb", "<UNKNOWN>")
        series_list = loc.get("data", {}).get("seriesResponseList")
        if series_list is None:
            print(f"  ⚠️  Missing seriesResponseList for {suburb!r}; skipping.")
            continue

        for metric in series_list:
            locality = metric.get("localityName", "<NO_NAME>")
            postcode = metric.get("postcodeName", "<NO_CODE>")
            loc_id = f"{locality}, {postcode}"
            # sanitize metricType for column names
            m = metric["metricType"].replace(" ", "_").replace("(", "").replace(")", "")

            for point in metric.get("seriesDataList", []):
                d = pd.to_datetime(point["dateTime"]).strftime("%Y%m%d")
                records.append({
                    "Locality": loc_id,
                    "Metric":   m,
                    "Date":     d,
                    "Value":    point["value"]
                })

    flat_df = pd.DataFrame.from_records(records)

    # 3. optional: inspect any exact duplicates
    dups = flat_df[flat_df.duplicated(["Locality", "Metric", "Date"], keep=False)]
    if not dups.empty:
        print("  ⚠️  Found duplicate rows; here they are:")
        print(dups)

    # 4. drop duplicates and pivot
    flat_df = flat_df.drop_duplicates(["Locality", "Metric", "Date"])
    wide = flat_df.pivot_table(
        index="Locality",
        columns=["Metric", "Date"],
        values="Value",
        aggfunc="first"
    )
    wide.columns = [f"{metric}_{date}" for metric, date in wide.columns]
    wide = wide.reset_index()

    # 5. save out as CSV next to the JSON
    csv_name = os.path.splitext(json_file)[0] + ".csv"
    wide.to_csv(csv_name, index=False, encoding="utf-8-sig")
    print(f"  ✅  Saved {csv_name} (shape: {wide.shape})")
