# Preprocessing income
This notebook is to preprocess the income data and join to the domain listing.

### Import packages

In [1]:
import pandas as pd

### Read the file

In [10]:
income_path = "../data/landing/ABS_data/income.csv"

In [11]:
income_df = pd.read_csv(income_path)

In [12]:
income_df.head()

Unnamed: 0,rank,suburb,value,link,page
0,1,Canterbury,"$2,352",http://house.speakingsame.com/profile.php?sta=...,0
1,2,Park Orchards,"$2,329",http://house.speakingsame.com/profile.php?sta=...,0
2,3,Wonga Park,"$2,221",http://house.speakingsame.com/profile.php?sta=...,0
3,4,Brighton,"$2,200",http://house.speakingsame.com/profile.php?sta=...,0
4,5,Camberwell,"$2,122",http://house.speakingsame.com/profile.php?sta=...,0


In [13]:
income_df.shape

(665, 5)

In [14]:
income_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 665 entries, 0 to 664
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   rank    665 non-null    int64 
 1   suburb  665 non-null    object
 2   value   665 non-null    object
 3   link    665 non-null    object
 4   page    665 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 26.1+ KB


In [15]:
# extract the weekly income column
income_df['Weekly Income($)'] = income_df['value'].str.replace('$', '').str.replace(',', '').astype(float)
income_df.head()


  income_df['Weekly Income($)'] = income_df['value'].str.replace('$', '').str.replace(',', '').astype(float)


Unnamed: 0,rank,suburb,value,link,page,Weekly Income($)
0,1,Canterbury,"$2,352",http://house.speakingsame.com/profile.php?sta=...,0,2352.0
1,2,Park Orchards,"$2,329",http://house.speakingsame.com/profile.php?sta=...,0,2329.0
2,3,Wonga Park,"$2,221",http://house.speakingsame.com/profile.php?sta=...,0,2221.0
3,4,Brighton,"$2,200",http://house.speakingsame.com/profile.php?sta=...,0,2200.0
4,5,Camberwell,"$2,122",http://house.speakingsame.com/profile.php?sta=...,0,2122.0


In [16]:
selected_columns = ['suburb', 'Weekly Income($)']
final_income = income_df[selected_columns]
final_income.head()

Unnamed: 0,suburb,Weekly Income($)
0,Canterbury,2352.0
1,Park Orchards,2329.0
2,Wonga Park,2221.0
3,Brighton,2200.0
4,Camberwell,2122.0


## Save the file

In [17]:
# Specify the file path where you want to save the CSV file
file_path = "../data/raw/ABS_data/income_suburb.csv"

# Use the to_csv method to save the DataFrame to a CSV file
final_income.to_csv(file_path, index=False)

## Join with the property data

In [18]:
income_sa2_path = "../data/landing/ABS_data/income_per_person_sa2.csv"
income_sa2 = pd.read_csv(income_sa2_path)
income_sa2.head()

Unnamed: 0.1,Unnamed: 0,SA2,SA2 NAME,2012,2013,2014,2015,2016,2017,2018,2019
0,0,201011001,Alfredton,51670.073951,52993.212211,54624.783507,56373.600964,57746.696785,60026.081134,62315.127415,63668.274127
1,1,201011002,Ballarat,61177.132231,63669.220872,65506.668018,67216.71066,72754.453378,71895.145248,74671.366381,77875.693878
2,2,201011003,Ballarat - North,48946.980038,50871.715079,52023.790882,53016.994081,54660.327661,56067.366669,59786.487796,60366.697603
3,3,201011004,Ballarat - South,41918.131939,43625.17389,44721.796782,45473.451837,46143.213201,47676.11197,49041.036441,50778.316302
4,4,201011005,Buninyong,53538.21604,55674.656008,56765.499102,58010.968734,58292.425144,60008.74745,61564.815018,63257.766533


In [23]:
income_sa2 = income_sa2.rename(columns={'SA2 NAME': 'SAL_NAME21'})
income_sa2.head()

Unnamed: 0.1,Unnamed: 0,SA2,SAL_NAME21,2012,2013,2014,2015,2016,2017,2018,2019
0,0,201011001,Alfredton,51670.073951,52993.212211,54624.783507,56373.600964,57746.696785,60026.081134,62315.127415,63668.274127
1,1,201011002,Ballarat,61177.132231,63669.220872,65506.668018,67216.71066,72754.453378,71895.145248,74671.366381,77875.693878
2,2,201011003,Ballarat - North,48946.980038,50871.715079,52023.790882,53016.994081,54660.327661,56067.366669,59786.487796,60366.697603
3,3,201011004,Ballarat - South,41918.131939,43625.17389,44721.796782,45473.451837,46143.213201,47676.11197,49041.036441,50778.316302
4,4,201011005,Buninyong,53538.21604,55674.656008,56765.499102,58010.968734,58292.425144,60008.74745,61564.815018,63257.766533


In [19]:
final_income.head()

Unnamed: 0,suburb,Weekly Income($)
0,Canterbury,2352.0
1,Park Orchards,2329.0
2,Wonga Park,2221.0
3,Brighton,2200.0
4,Camberwell,2122.0


In [26]:
final_path = '../data/raw/final_data_1.csv'
final_df = pd.read_csv(final_path)
final_df.head()

### Drop unnecessary columns

In [27]:
final_df.drop('Weekly Income($)', axis=1, inplace=True)
final_df.head()

Unnamed: 0.1,Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,...,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km,nearest_bus_stop_distance_km,nearest_school_distance_km
0,0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,...,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073,0.135415,0.370453,0.135597,0.239541
1,1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,...,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964,0.070096,0.273663,0.087685,0.296386
2,2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,...,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333,0.114239,0.336648,0.126732,0.193919
3,3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,...,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073,0.036919,0.613539,0.112485,0.356671
4,4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,...,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873,0.120055,0.324232,0.080214,0.242857


In [30]:
final_df.drop('nearest_park_distance_km', axis=1, inplace=True)
final_df.head()

Unnamed: 0.1,Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,...,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km,nearest_bus_stop_distance_km,nearest_school_distance_km
0,0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,...,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,0.135415,0.370453,0.135597,0.239541
1,1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,...,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,0.070096,0.273663,0.087685,0.296386
2,2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,...,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,0.114239,0.336648,0.126732,0.193919
3,3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,...,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,0.036919,0.613539,0.112485,0.356671
4,4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,...,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,0.120055,0.324232,0.080214,0.242857


In [31]:
drop_columns = ['LT_resident_pcg', 'owner_pcg', 'family_pcg']
final_df.drop(columns = ['LT_resident_pcg', 'owner_pcg', 'family_pcg'], inplace = True)
final_df.dropna()

Unnamed: 0.1,Unnamed: 0,Location,type_property,price,state,rooms,bath,parking,link,public_1,...,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km,nearest_bus_stop_distance_km,nearest_school_distance_km
0,0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,VIC,1.0,1.0,0.0,https://www.domain.com.au/4506-33-rose-lane-me...,Docklands Primary School,...,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,0.135415,0.370453,0.135597,0.239541
1,1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,VIC,2.0,2.0,1.0,https://www.domain.com.au/1715-220-spencer-str...,Docklands Primary School,...,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,0.070096,0.273663,0.087685,0.296386
2,2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,VIC,2.0,2.0,0.0,https://www.domain.com.au/5801-648-lonsdale-st...,Docklands Primary School,...,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,0.114239,0.336648,0.126732,0.193919
3,3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,VIC,1.0,1.0,0.0,https://www.domain.com.au/521-422-collins-st-m...,Docklands Primary School,...,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,0.036919,0.613539,0.112485,0.356671
4,4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,VIC,2.0,2.0,0.0,https://www.domain.com.au/603-199-william-stre...,Docklands Primary School,...,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,0.120055,0.324232,0.080214,0.242857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12017,12017,62 Athenaeum Avenue Clyde North VIC 3978,House,$480 per week,VIC,3.0,2.0,2.0,https://www.domain.com.au/62-athenaeum-avenue-...,Clyde Secondary College,...,0.0,7.0,4529.0,"[-38.12142679999999, 145.3735367]",49.483555,4.579141,33.913455,6.966027,2.484480,1.773057
12018,12018,5 Kenana Street Clyde North VIC 3978,House,$580,VIC,4.0,2.0,2.0,https://www.domain.com.au/5-kenana-street-clyd...,Ramlegh Park Primary School,...,0.0,7.0,4529.0,"[-38.1039797, 145.3612992]",47.377999,3.850631,31.697865,5.924626,2.368958,1.544524
12020,12020,81 Ramlegh Boulevard Clyde North VIC 3978,Townhouse,"$495pw | $2,151pcm",VIC,3.0,2.0,2.0,https://www.domain.com.au/81-ramlegh-boulevard...,Ramlegh Park Primary School,...,0.0,7.0,4529.0,"[-38.1168601, 145.3449293]",47.334706,2.072609,32.288701,5.948969,0.468924,0.621983
12023,12023,26 Keighery Drive Clyde North VIC 3978,House,$600 per week leased,VIC,4.0,2.0,2.0,https://www.domain.com.au/26-keighery-drive-cl...,Ramlegh Park Primary School,...,0.0,7.0,4529.0,"[-38.111692, 145.3467194]",47.041894,2.345958,31.845421,5.941091,1.052354,0.819029


In [None]:
final_df.head()

In [32]:
file_path = "../data/raw/final_drop_pcg.csv"

# Use the to_csv method to save the DataFrame to a CSV file
final_df.to_csv(file_path, index=False)

## Save the final datasets for processing

In [24]:
result = pd.merge(final_df, income_sa2, on='SAL_NAME21', how='outer')
result.head()

In [25]:
# Specify the file path where you want to save the CSV file
file_path = "../data/raw/ABS_data/final_2.csv"

# Use the to_csv method to save the DataFrame to a CSV file
result.to_csv(file_path, index=False)