# Download for 2021 census demographic by suburb

In [None]:
import requests
import os
import re
import pandas as pd
import glob

from urllib.request import urlretrieve
from urllib.error import HTTPError, URLError
from utils.preprocess import PreprocessUtils

# create csv downloads for sheets of interest

In [None]:
# Initialize PreprocessUtils
preprocess_utils = PreprocessUtils()

# Use the complete workflow method to process all census data
# This replaces the manual processing loop that was here
preprocess_utils.process_census_data_workflow(
    no_data_list=no_data,
    sal_start=20001,
    sal_end=22944,
    base_data_dir="../data/"
)



# merge csvs to create 7 LARGE csvs

In [None]:
# Note: The merge_csvs method in PreprocessUtils handles all the merging automatically
# The workflow method already includes the merge step, so this manual merging is no longer needed
print("CSV merging is handled automatically by the workflow method.")


## Refactored Notebook - Using PreprocessUtils

This notebook has been refactored to use the `PreprocessUtils.process_census_data_workflow()` method instead of manual processing loops.

### What the workflow method does:
1. **Downloads** census Excel files for all SAL codes (20001-22944)
2. **Extracts suburb names** from the Excel files
3. **Processes all selected sheets** (G02, G04, G17, G33, G36, G49, G60) into CSV files
4. **Merges all CSV files** into 7 large consolidated CSV files in the landing directory

### Benefits of using the workflow method:
- **Cleaner code**: Single method call instead of complex loops
- **Better error handling**: Built-in retry logic and error management
- **Consistent processing**: Uses the same logic across all files
- **Automatic merging**: No need for manual CSV concatenation
- **Maintainable**: Changes to processing logic only need to be made in one place

### Output files created:
- `median_stats.csv` - Statistical summaries
- `population_breakdown.csv` - Age demographics
- `personal_income.csv` - Personal income by age
- `household_income.csv` - Household income distributions
- `dwelling_structure.csv` - Housing types and structures
- `education_level.csv` - Education levels by age
- `job_type.csv` - Employment types by age
