# External Data Collection for Real Estate Analysis

Download key external datasets

In [None]:
import requests
import zipfile
import pandas as pd
import geopandas as gpd
from pathlib import Path
import os
import time

# Create directory structure
directories = [
    '../data/landing/boundaries',
    '../data/landing/demographics', 
    '../data/landing/transport',
    '../data/landing/schools',
    '../data/landing/crime',
    '../data/landing/projections'
]

for directory in directories:
    Path(directory).mkdir(parents=True, exist_ok=True)

## Download SA2 Boundaries

Download Statistical Area Level 2 boundaries for mapping properties to demographic areas.

In [9]:
# Download SA2 boundaries
url = "https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/SA2_2021_AUST_SHP_GDA2020.zip"

print("Downloading SA2 boundaries...")
response = requests.get(url, timeout=120)
response.raise_for_status()

zip_path = Path('../data/landing/boundaries/SA2_2021_AUST_GDA2020.zip')
with open(zip_path, 'wb') as f:
    f.write(response.content)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('../data/landing/boundaries/')

print("SA2 boundaries downloaded")

Downloading SA2 boundaries...
SA2 boundaries downloaded


## ABS Census Data

Census DataPacks require manual download due to dynamic URLs.

In [11]:
# Download Census DataPacks
url = "https://www.abs.gov.au/census/find-census-data/datapacks/download/2021_GCP_SA2_for_VIC_short-header.zip"

print("Downloading Census data...")
response = requests.get(url, timeout=120)
response.raise_for_status()

zip_path = Path('../data/landing/demographics/2021_GCP_SA2_for_VIC_short-header.zip')
with open(zip_path, 'wb') as f:
    f.write(response.content)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('../data/landing/demographics/')

print("Census data downloaded")

Downloading Census data...
Census data downloaded


## Population Projections

Download Victoria in Future 2023 projections by SA2.

In [14]:
# Download population projections
url = "https://www.planning.vic.gov.au/__data/assets/excel_doc/0038/680879/VIF2023_Victoria_Demographic_Projections_to_2051.xlsx"


print("Downloading population projections...")
response = requests.get(url, timeout=60)
response.raise_for_status()

with open('../data/landing/projections/VIF2023_projections.xlsx', 'wb') as f:
    f.write(response.content)

print("Population projections downloaded")


Downloading population projections...
Population projections downloaded


## School Locations

Download Victorian school locations with coordinates.

In [19]:
# Download school locations CSV directly
url = "https://www.education.vic.gov.au/Documents/about/research/datavic/dv378_DataVic-SchoolLocations-2024.csv"

print("Downloading school locations...")
response = requests.get(url, timeout=30)
response.raise_for_status()

with open('../data/landing/schools/school_locations_2024.csv', 'wb') as f:
    f.write(response.content)

# Load and check the data
df = pd.read_csv('../data/landing/schools/school_locations_2024.csv', encoding='latin-1')
print(f"Downloaded {len(df)} schools")

Downloading school locations...
Downloaded 2294 schools


## Crime Statistics

Download crime data by Local Government Area.

In [20]:
# Download crime statistics
crime_url = "https://files.crimestatistics.vic.gov.au/2025-06/Data_Tables_LGA_Criminal_Incidents_Year_Ending_March_2025.xlsx"

print("Downloading crime statistics...")
try:
    response = requests.get(crime_url, timeout=60)
    response.raise_for_status()
    
    with open('../data/landing/crime/crime_by_lga_2025.xlsx', 'wb') as f:
        f.write(response.content)
    
    print("Crime statistics downloaded")
    
except requests.exceptions.RequestException as e:
    print(f"Download failed: {e}")
    print("Manual download required from: https://www.crimestatistics.vic.gov.au/crime-statistics/latest-victorian-crime-data/download-data")

Downloading crime statistics...
Crime statistics downloaded


## PTV GTFS Data

Download PTV dataset - Please refer to below for the data dictionary

https://discover.data.vic.gov.au/dataset/gtfs-schedule

In [29]:
# Check if GTFS data already exists, if not download it
gtfs_zip_path = Path('../data/landing/transport/ptv_gtfs.zip')
gtfs_folder_path = Path('../data/landing/transport/ptv_gtfs')

if not gtfs_zip_path.exists():
    print("Downloading PTV GTFS data...")
    gtfs_url = "https://data.ptv.vic.gov.au/downloads/gtfs.zip"
    response = requests.get(gtfs_url, timeout=60)
    response.raise_for_status()
    
    with open(gtfs_zip_path, 'wb') as f:
        f.write(response.content)
    
    with zipfile.ZipFile(gtfs_zip_path, 'r') as zip_ref:
        zip_ref.extractall('../data/landing/transport/ptv_gtfs/')
else:
    print("GTFS data already exists")

GTFS data already exists
