In [None]:
from pathlib import Path

import pandas as pd

from domain_validation import validate_gisoo


## File Setup Instructions

1. Assign the names of:
   - the validation file (CSV, currently based on census data)
   - the processed district file (GeoJSON).

2. Both files must be placed inside a folder named `input_files`.

3. The `input_files` folder must be located in the **same directory** as this Jupyter notebook (i.e., the directory where you launched JupyterLab).

The current census input is `filtered_census.csv` inside `input_files`.

Make sure your code refers to these files using filenames only (not full paths), for example:

```python
validation_csv_path = 'filtered_census.csv'
processed_district_path = 'your_processed_district_file.geojson'
```


In [None]:
validation_csv_path = 'filtered_census.csv'
processed_district_path = 'QuebecCity.geojson'


## Assigning Field Titles Based on the Validation File

Using the CSV validation file, assign the appropriate field names to the variables in the cell below for:
- FSA
- number of private dwellings

(The cell below is pre-filled with values corresponding to the Quebec census data.)


In [None]:
validation_file_fsa = 'ALT_GEO_CODE'
units_num = 'C1_COUNT_TOTAL'


| Dwelling Type | Approximate Average Area (m²) | Notes |
|--------------|------------------------------:|-------|
| Single-detached house | ~198 | Based on average size of newly built single-family homes in Québec (2016–2020); older stock likely smaller |
| Semi-detached house | ~110–140 | Estimated from typical Canadian semi-detached homes; no Québec-wide official average |
| Row house (townhouse) | ~120–150 | Typical Québec townhouse floor areas based on market listings |
| Apartment or flat in a duplex | ~60–90 | Corresponds roughly to 3½–4½ apartments (≈650–1000 ft²) |
| Apartment in a building with fewer than five storeys | ~70–100 | Low-rise apartments, often larger units than high-rise |
| Apartment in a building with five or more storeys | ~60–90 | High-rise apartments; higher share of smaller units |
| Other single-attached house | ~100–140 | Broad category; includes linked or clustered houses |
| Movable dwelling (mobile home) | ~60–90 | Typical size range for mobile/manufactured homes |

(From ChatGPT)


## Census Average Area Dictionary (Optional)

The census total area is derived from a dictionary of average areas per dwelling type.
Defaults are provided, but you can override any keys by passing a dictionary to `census_avg_area_by_type`.
If you leave it empty, the defaults are used.


In [None]:
# Default values

# census_avg_area_by_type = {
#     'Single-detached house': 160.0,
#     'Semi-detached house': 160.0,
#     'Row house': 120.0,
#     'Apartment or flat in a duplex': 95.0,
#     'Apartment in a building that has fewer than five storeys': 95.0,
#     'Apartment in a building that has five or more storeys': 95.0,
#     'Other single-attached house': 95.0,
#     'Movable dwelling': 95.0,
#     # If the below is not equal to 0.0, the calculation is
#     # based on total private dwellings. (synthetic parameter).
#     'Remaining dwellings': 0.0,
# }

census_avg_area_by_type = {
    # You can skip defining this dictionary for the above values.
    # 
    # 
}


# test_2 
# census_avg_area_by_type = {
#     'Single-detached house': 198.0,
#     'Semi-detached house': 125.0,
#     'Row house': 135.0,
#     'Apartment or flat in a duplex': 75.0,
#     'Apartment in a building that has fewer than five storeys': 85.0,
#     'Apartment in a building that has five or more storeys': 75.0,
#     'Other single-attached house': 120.0,
#     'Movable dwelling': 75.0,
#      # If the below is not equal to 0.0, the calculation is
#      # based on total private dwellings. (synthetic parameter).
#     'Remaining dwellings': 0.0,
# }


# test_3 
# census_avg_area_by_type = {
#     'Single-detached house': 160.0,
#     'Semi-detached house': 125.0,
#     'Row house': 120.0,
#     'Apartment or flat in a duplex': 75.0,
#     'Apartment in a building that has fewer than five storeys': 85.0,
#     'Apartment in a building that has five or more storeys': 75.0,
#     'Other single-attached house': 120.0,
#     'Movable dwelling': 75.0,
#      # If the below is not equal to 0.0, the calculation is
#      # based on total private dwellings. (synthetic parameter).
#     'Remaining dwellings': 0.0,
# }




# test_4 
# census_avg_area_by_type = {
#     'Single-detached house': 160.0,
#     'Semi-detached house': 135.0,
#     'Row house': 120.0,
#     'Apartment or flat in a duplex': 75.0,
#     'Apartment in a building that has fewer than five storeys': 85.0,
#     'Apartment in a building that has five or more storeys': 75.0,
#     'Other single-attached house': 120.0,
#     'Movable dwelling': 75.0,
#      # If the below is not equal to 0.0, the calculation is
#      # based on total private dwellings. (synthetic parameter).
#     'Remaining dwellings': 0.0,
# }


## Assigning Field Titles Based on the Processed District

From the GeoJSON file of your processed district, assign the field names to the corresponding variables in the cell below for:
- postal code
- function
- area
- floor number


In [None]:
postal_code = 'CODE_POSTA'
function = 'function'
area = 'total_area'
floor_num = 'NBR_ETAGE'


## Assigning Value to the Function's Field

Set `function_value` to the function you want to validate.
For example, if you are looking for residential buildings and the value representing residential use is `"Logement"`, assign that string to `function_value`.


In [None]:
function_value = 'Logement'


## Instantiate the Validator


In [None]:
validate_district = validate_gisoo.ValidateGISOO(
    validation_csv_path,
    processed_district_path,
    validation_file_fsa,
    units_num,
    postal_code,
    function,
    function_value,
    area,
    floor_num,
    census_avg_area_by_type=census_avg_area_by_type,
)


## Comparison Table


In [None]:
codes = validate_district.district_codes
comparison_df = pd.DataFrame(validate_district.comparison_table(codes))
comparison_df


## Save Comparison CSV


In [None]:
district_name = 'qubec_city'
validate_district.comparison_csv(codes, avg_area=90, distric_name=district_name)


## Save Area Comparison Plot


In [None]:
output_dir = Path('output_files')
output_dir.mkdir(exist_ok=True)

fig, ax = validate_district.ValidateGISOO.plot_area_comparison(
    codes_info=comparison_df['FSA'],
    areas=comparison_df['Cleaned Total Area (with proxy)'],
    census_areas=comparison_df['Census Total Area (by type)'],
    title='Area comparison - Quebec_City',
    y_label='Area (m^2)',
    x_label='Cleaned',
)

plot_path = output_dir / 'Quebec_City_area_comparison.png'
fig.savefig(plot_path, dpi=150)
plot_path
