# Curated Preprocessing for Population Data

In [1]:
import pandas as pd

In [2]:
LANDING_DATA_DIR = "../../../data/landing"
RAW_DATA_DIR = "../../../data/raw"
CURATED_DATA_DIR = "../../../data/curated"

In [3]:
pop = pd.read_csv(f"{RAW_DATA_DIR}/population.csv")

## Remove Rows not in Victoria

In [4]:
pop["S/T name"].unique()

array(['New South Wales', 'Victoria', 'Queensland', 'South Australia',
       'Western Australia', 'Tasmania', 'Northern Territory',
       'Australian Capital Territory', 'Other Territories'], dtype=object)

In [5]:
# Remove rows unrelated to Victoria
pop_vic = pop[pop["S/T name"].str.contains("Victoria") == True]

In [6]:
pop_vic.head()

Unnamed: 0,S/T name,SA2 code,SA2 name,2001,2002,2003,2004,2005,2006,2007,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
642,Victoria,201011001.0,Alfredton,5756,6092,6293,6480,6648,6761,7034,...,9714,10338,11039,11852,12639,13520,14408,15472,16823,17962
643,Victoria,201011002.0,Ballarat,11497,11708,12015,12189,12269,12356,12408,...,12352,12327,12300,12301,12261,12237,12311,12183,12076,11921
644,Victoria,201011005.0,Buninyong,5320,5399,5557,5620,5857,6037,6131,...,6984,7082,7191,7311,7407,7413,7452,7369,7232,7243
645,Victoria,201011006.0,Delacombe,4154,4225,4371,4465,4704,5041,5206,...,6267,6583,6846,7195,7617,8174,8876,9736,10640,11782
646,Victoria,201011007.0,Smythes Creek,3317,3378,3411,3473,3508,3542,3594,...,3914,3945,3966,3990,4002,4040,4109,4148,4213,4218


## Transpose Year Columns

In [7]:
# Define columns to keep and columns to convert to rows
col_keep = ["S/T name", "SA2 code", "SA2 name"]

col_convert = []
for col in list(pop_vic.columns):
    if col not in col_keep:
        col_convert.append(col)
        
# col_keep = col_keep[1:3]

# Transpose year columns
pop_transposed = pop_vic.melt(id_vars = col_keep, value_vars = col_convert,
                              var_name = "Year", value_name = "Population"
)

## Output to CSV in Curated data directory

In [8]:
pop_transposed.to_csv(f"{CURATED_DATA_DIR}/population.csv", index = False)