In [None]:
# write function here

# Copy coordinates from df to df_tmp based on property_id
# This imputes missing coordinates in df_tmp with values from df
def copy_coordinates_by_property_id(df, df_tmp):
    """
    Copy coordinates from df to df_tmp based on property_id mapping.
    
    Parameters:
    df: DataFrame with 'property_id' and 'coordinates' columns
    df_tmp: DataFrame with 'property_id' column (may have missing coordinates)
    
    Returns:
    df_tmp: Updated DataFrame with imputed coordinates
    """
    # Create a mapping dictionary from df
    coord_mapping = df.set_index('property_id')['coordinates'].to_dict()
    
    # Update df_tmp coordinates where they are missing
    # Only update rows where coordinates are null/NaN
    mask = df_tmp['coordinates'].isna()
    df_tmp.loc[mask, 'coordinates'] = df_tmp.loc[mask, 'property_id'].map(coord_mapping)
    
    return df_tmp

# Example usage:
# df_tmp = copy_coordinates_by_property_id(df, df_tmp)


In [7]:
# reload all module before executing code
%load_ext autoreload
%autoreload 2

import pandas as pd

# Import the GeoUtils class from utils/geo.py
from utils.geo import GeoUtils

# Initialize GeoUtils 
geoutils = GeoUtils()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
OpenRouteService client initialized successfully.


In [8]:
df = pd.read_csv('../data/raw/missing_geo/rental_listings_missing_coordinates.csv')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4127 entries, 0 to 4126
Data columns (total 40 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   property_id          4127 non-null   float64
 1   rental_price         4127 non-null   object 
 2   bedrooms             4099 non-null   float64
 3   bathrooms            4123 non-null   float64
 4   car_spaces           3642 non-null   float64
 5   property_type        4127 non-null   object 
 6   land_area            4127 non-null   float64
 7   property_features    4127 non-null   object 
 8   suburb               4127 non-null   object 
 9   postcode             4127 non-null   int64  
 10  year                 4127 non-null   int64  
 11  quarter              4127 non-null   int64  
 12  age_0_to_19          0 non-null      float64
 13  age_20_to_39         0 non-null      float64
 14  age_40_to_59         0 non-null      float64
 15  age_60_plus          0 non-null      f

In [9]:
# process first 500 rows of df 
df = df.head(500)

# apply geocoding using geoutils.geocode_ors_with_delay
df['coordinates'] = df['full_address'].apply(geoutils.geocode_ors_with_delay)

Successfully geocoded address: Monagham Terrace, Alfredton, VIC 3350
Successfully geocoded address: Ascot Gardens Drive, Bonshaw, VIC 3352
Successfully geocoded address: Prophecy Road, Bonshaw, VIC 3352
Successfully geocoded address: 74 Westacott Crescent, Baranduda, VIC 3691
Successfully geocoded address: Monkey Drive Botanic, Ridge, VIC 3977
Successfully geocoded address: Bloodwood St, Broadford, VIC 3658
Successfully geocoded address: Teal Court, Kilmore, VIC 3764
Successfully geocoded address: 18 De Carle Street, Brunswick, VIC 3056
Successfully geocoded address: Merriang South Road, Myrtleford, VIC 3737
Successfully geocoded address: Midland Highway Barkers, Creek, VIC 3451
Successfully geocoded address: Tyrell Terrace, Waterways, VIC 3195
Successfully geocoded address: Cisticola Lane, Bonbeach, VIC 3196
Successfully geocoded address: Saint Germain Boulevard Clyde, North, VIC 3978
Successfully geocoded address: 103 Vermont Street Street, Barooga, NSW 3644
Successfully geocoded add



Successfully geocoded address: Donaldson Street, Warrnambool, VIC 3280
Successfully geocoded address: Rosevae Crescent, Keysborough, VIC 3173
Successfully geocoded address: Aldridge Street Street Endeavour, Hills, VIC 3802
Successfully geocoded address: Rhianna Street, Greenvale, VIC 3059
Successfully geocoded address: Oconnor Crescent Hampton, East, VIC 3188




Successfully geocoded address: 43a Mccomb Boulevard Frankston, South, VIC 3199
Successfully geocoded address: Raptor Place Melton, South, VIC 3338
Successfully geocoded address: Arena Circuit Melton, South, VIC 3338
Successfully geocoded address: Estrella Street, Bonshaw, VIC 3352
Successfully geocoded address: Innurbruck Road Smythes, Creek, VIC 3351
Successfully geocoded address: Ascot Gardens Drive, Bonshaw, VIC 3352
Successfully geocoded address: 121 Ascot Gardens Drive, Bonshaw, VIC 3352
Successfully geocoded address: Goldsborough Street, Bonshaw, VIC 3352
Successfully geocoded address: Masada Boulevard Smythes, Creek, VIC 3351
Successfully geocoded address: Mustang Court, Shepparton, VIC 3630
Successfully geocoded address: Bradfield Loop, Strathfieldsaye, VIC 3551
Successfully geocoded address: Frogmouth Avenue, Strathfieldsaye, VIC 3551
Successfully geocoded address: Bradfield Lp, Strathfieldsaye, VIC 3551
Successfully geocoded address: Hiltaba Walk, Tarneit, VIC 3029
Successful



Successfully geocoded address: Maidenhair Walk Cranbourne, West, VIC 3977
Successfully geocoded address: Maidenhair Walk Cranbourne, West, VIC 3977
Successfully geocoded address: 13a Irving Street Dandenong, North, VIC 3175
Successfully geocoded address: Nook Place, Shepparton, VIC 3630
Successfully geocoded address: Nook Place, Shepparton, VIC 3630
Successfully geocoded address: 11 Mcalister Avenue, Frankston, VIC 3199
Successfully geocoded address: 161a Commercial Road King Street, Yarram, VIC 3971
Successfully geocoded address: Evans Crescent Street, Laverton, VIC 3028
Successfully geocoded address: Talbot Crescent, Malvern, VIC 3144
Successfully geocoded address: 4 South Street, Eildon, VIC 3713
Successfully geocoded address: 594 Saint Kilda Road, Melbourne, VIC 3004
Successfully geocoded address: Mey Place, Shepparton, VIC 3630
Successfully geocoded address: Carpenters Rocks Road, Compton, SA 5291
Successfully geocoded address: 85 Flemington Street North, Melbourne, VIC 3051
Succe

In [22]:
# read in "data/domain/rental_listings_summary_cleaned.csv"
df_tmp = pd.read_csv('../data/processed/domain/rental_listings_summary_cleaned.csv')

df_tmp.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11703 entries, 0 to 11702
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   property_id        11703 non-null  int64  
 1   url                11703 non-null  object 
 2   rental_price       11703 non-null  object 
 3   bedrooms           11631 non-null  float64
 4   bathrooms          11676 non-null  float64
 5   car_spaces         10383 non-null  float64
 6   property_type      11703 non-null  object 
 7   land_area          1011 non-null   float64
 8   property_features  11703 non-null  object 
 9   suburb             11703 non-null  object 
 10  postcode           11703 non-null  int64  
 11  scraped_date       11703 non-null  object 
 12  period             11703 non-null  object 
 13  address            11703 non-null  object 
 14  coordinates        11417 non-null  object 
dtypes: float64(4), int64(2), object(9)
memory usage: 1.3+ MB


In [None]:
# write function here
df_tmp = pd.read_csv('../data/processed/domain/rental_listings_summary_cleaned.csv')