In [55]:
import numpy as np
import pandas as pd

In [56]:
import glob
import os
# csv_files = glob.glob(os.path.join("../data/raw/domain/suburbs", "*.csv"))
# joined_suburbs = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True).sort_values(by='listing_id').reset_index(drop=True)

# print(len(joined_suburbs))
# joined_suburbs.head()

### Data reading

In [57]:
all_suburbs = pd.read_csv("../data/raw/domain/vic_rentals_all.csv").sort_values(by='listing_id').reset_index(drop=True)
print(len(all_suburbs))
all_suburbs.head()

12717


Unnamed: 0,listing_id,suburb,postcode,weekly_rent,bond,available_date,date_listed,days_listed,bedrooms,bathrooms,...,floorplans_count,virtual_tour,primary_type,secondary_type,agency,agency_id,agent_names,structured_features,url,land_area
0,5470976,ASCOT VALE,3032,660.0,2868.0,"Wednesday, 10 September 2025",2025-07-21,50.0,2.0,1.0,...,0.0,False,House,House,Keyhole Property Investments,8749.0,"Rentals, Reception","Air conditioning, Bath, Built in wardrobes, Cl...",https://www.domain.com.au/68-francis-street-as...,
1,5604062,MELTON,3337,,150.0,"Tuesday, 15 January 2008",2008-01-15,6446.0,,,...,0.0,False,Apartment,Apartment / Unit / Flat,Raine & Horne Melton,22328.0,,,https://www.domain.com.au/10-glenville-drive-m...,
2,6168570,MELBOURNE,3000,310.0,1347.0,"Monday, 03 July 2023",2025-09-02,7.0,,1.0,...,0.0,False,Apartment,Studio,Match Property Group,8668.0,Lisbeth Rosborg-Winter,"Built in wardrobes, Furnished, Broadband inter...",https://www.domain.com.au/32-546-flinders-stre...,
3,7117948,MOONEE PONDS,3039,500.0,2173.0,"Friday, 19 September 2025",2025-09-06,3.0,2.0,1.0,...,0.0,False,Apartment,Apartment / Unit / Flat,Simone Bullen,7896.0,Ebonnie Reid,Ground floor,https://www.domain.com.au/7-64-holmes-road-moo...,
4,7455074,PRAHRAN,3181,,340.0,"Thursday, 22 May 2014",2012-03-02,4938.0,1.0,1.0,...,0.0,False,Apartment,Apartment / Unit / Flat,Prime Property Partners Australia,2231.0,Maia Weinberg,,https://www.domain.com.au/1-60-the-avenue-prah...,


### Initial feature selection

In [58]:
all_suburbs.columns

Index(['listing_id', 'suburb', 'postcode', 'weekly_rent', 'bond',
       'available_date', 'date_listed', 'days_listed', 'bedrooms', 'bathrooms',
       'carspaces', 'property_type', 'address', 'lat', 'lon', 'scraped_date',
       'domain_page_id', 'property_id', 'photo_count', 'video_count',
       'floorplans_count', 'virtual_tour', 'primary_type', 'secondary_type',
       'agency', 'agency_id', 'agent_names', 'structured_features', 'url',
       'land_area'],
      dtype='object')

In [59]:
DROPPED_COLUMNS = [
    "scraped_date",
    "domain_page_id",
    "property_id",
    "agency_id",
    "structured_features",
    "url",
]

In [60]:
all_suburbs = all_suburbs.drop(columns=DROPPED_COLUMNS)
all_suburbs.head()

Unnamed: 0,listing_id,suburb,postcode,weekly_rent,bond,available_date,date_listed,days_listed,bedrooms,bathrooms,...,lon,photo_count,video_count,floorplans_count,virtual_tour,primary_type,secondary_type,agency,agent_names,land_area
0,5470976,ASCOT VALE,3032,660.0,2868.0,"Wednesday, 10 September 2025",2025-07-21,50.0,2.0,1.0,...,144.9182,12.0,0.0,0.0,False,House,House,Keyhole Property Investments,"Rentals, Reception",
1,5604062,MELTON,3337,,150.0,"Tuesday, 15 January 2008",2008-01-15,6446.0,,,...,144.59305,6.0,0.0,0.0,False,Apartment,Apartment / Unit / Flat,Raine & Horne Melton,,
2,6168570,MELBOURNE,3000,310.0,1347.0,"Monday, 03 July 2023",2025-09-02,7.0,,1.0,...,144.95618,1.0,0.0,0.0,False,Apartment,Studio,Match Property Group,Lisbeth Rosborg-Winter,
3,7117948,MOONEE PONDS,3039,500.0,2173.0,"Friday, 19 September 2025",2025-09-06,3.0,2.0,1.0,...,144.91553,7.0,0.0,0.0,False,Apartment,Apartment / Unit / Flat,Simone Bullen,Ebonnie Reid,
4,7455074,PRAHRAN,3181,,340.0,"Thursday, 22 May 2014",2012-03-02,4938.0,1.0,1.0,...,144.9986,12.0,0.0,0.0,False,Apartment,Apartment / Unit / Flat,Prime Property Partners Australia,Maia Weinberg,


### Feature Engineering

#### Merging with transport data

In [61]:
from transport_analysis import (
    return_stop_insights_metro_bus, 
    return_stop_insights_metro_tram, 
    return_stop_insights_metro_train, 
    return_stop_insights_regional_bus, 
    return_stop_insights_regional_train
)
RADIUS = 2 # 2km
transport_data = pd.read_csv("../data/processed/transport/transport_stops.csv")
transport_data.head()

Unnamed: 0,StopName,StopType,Latitude,Longitude
0,10 Jarrah Dr,Metro Bus,-38.002837,145.110716
1,10 Oban Rd,Metro Bus,-37.796342,145.252047
2,10 Queens Pde,Metro Bus,-37.719582,144.971255
3,100 South Gippsland Hwy,Metro Bus,-38.008999,145.229229
4,1000 Steps/Mount Dandenong Tourist Rd,Metro Bus,-37.889758,145.318343


In [62]:
# 3 mins to run
all_suburbs["num_metro_bus_stops"] = all_suburbs.apply(lambda row: return_stop_insights_metro_bus(row["lat"], row["lon"], transport_data, RADIUS), axis=1)
all_suburbs["num_metro_tram_stops"] = all_suburbs.apply(lambda row: return_stop_insights_metro_tram(row["lat"], row["lon"], transport_data, RADIUS), axis=1)
all_suburbs["num_metro_train_stops"] = all_suburbs.apply(lambda row: return_stop_insights_metro_train(row["lat"], row["lon"], transport_data, RADIUS), axis=1)
all_suburbs["num_regional_bus_stops"] = all_suburbs.apply(lambda row: return_stop_insights_regional_bus(row["lat"], row["lon"], transport_data, RADIUS), axis=1)
all_suburbs["num_regional_train_stops"] = all_suburbs.apply(lambda row: return_stop_insights_regional_train(row["lat"], row["lon"], transport_data, RADIUS), axis=1)
all_suburbs.head()

Unnamed: 0,listing_id,suburb,postcode,weekly_rent,bond,available_date,date_listed,days_listed,bedrooms,bathrooms,...,primary_type,secondary_type,agency,agent_names,land_area,num_metro_bus_stops,num_metro_tram_stops,num_metro_train_stops,num_regional_bus_stops,num_regional_train_stops
0,5470976,ASCOT VALE,3032,660.0,2868.0,"Wednesday, 10 September 2025",2025-07-21,50.0,2.0,1.0,...,House,House,Keyhole Property Investments,"Rentals, Reception",,83,38,4,0,0
1,5604062,MELTON,3337,,150.0,"Tuesday, 15 January 2008",2008-01-15,6446.0,,,...,Apartment,Apartment / Unit / Flat,Raine & Horne Melton,,,35,0,0,0,0
2,6168570,MELBOURNE,3000,310.0,1347.0,"Monday, 03 July 2023",2025-09-02,7.0,,1.0,...,Apartment,Studio,Match Property Group,Lisbeth Rosborg-Winter,,85,108,6,0,3
3,7117948,MOONEE PONDS,3039,500.0,2173.0,"Friday, 19 September 2025",2025-09-06,3.0,2.0,1.0,...,Apartment,Apartment / Unit / Flat,Simone Bullen,Ebonnie Reid,,129,40,3,0,1
4,7455074,PRAHRAN,3181,,340.0,"Thursday, 22 May 2014",2012-03-02,4938.0,1.0,1.0,...,Apartment,Apartment / Unit / Flat,Prime Property Partners Australia,Maia Weinberg,,85,100,7,0,0
