# Map SA2 to SAL region
The purpose of this file is to get a 1 to 1 correspondance between SAL and SA2 regions

### Import packages

In [1]:
import geopandas as gpd
import pandas as pd

### Read file

In [2]:
# Read the geopandas shapefile for the SA2 and SAL data
SA2 = gpd.read_file('../data/raw/victoria_region_gdf/SA2_region_gdf.geojson')
SAL = gpd.read_file('../data/raw/victoria_region_gdf/SAL_region_gdf.geojson')

In [4]:
# Select only relevant columns
SA2 = SA2[['SA2_CODE21', 'geometry']]
SAL = SAL[['SAL_CODE21', 'geometry']]

In [5]:
# drop nan values for SA2 and SAL regions that doesn't have a shape
SA2.dropna(inplace = True)
SAL.dropna(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  SA2.dropna(inplace = True)


### Start mapping

In [6]:
# For each SAL region, find the SA2 region that it belongs to by the most overlap
max_overlap_mapping = []
for sal_idx, sal_row in SAL.iterrows():
    max_overlap_area = 0
    max_overlap_index = None
    
    # Iterate over each SA2 shape and compute intersection area
    for sa2_idx, sa2_row in SA2.iterrows():
        intersection_area = sal_row['geometry'].intersection(sa2_row['geometry']).area
        
        if intersection_area > max_overlap_area:
            max_overlap_area = intersection_area
            max_overlap_index = sa2_idx
    
    max_overlap_mapping.append((sal_idx, max_overlap_index))

# Convert the mapping to a dataframe
mapping_df = pd.DataFrame(max_overlap_mapping, columns=['SAL_CODE21', 'SA2_CODE21'])

# Join SAL with the mapping
result_df = SAL.join(mapping_df.set_index('SAL_CODE21'), how='left')

# Join the result with SA2 to get SA2 details
result_df = result_df.join(SA2, on='SA2_CODE21', rsuffix='_SA2')

In [7]:
# Select the relevant columns in the result dataframe
result_df = result_df[['SAL_CODE21', 'SA2_CODE21_SA2']].rename(columns = {'SA2_CODE21_SA2': 'SA2_CODE21'})

### Save the file

In [8]:
# Save the SA2 to SAL correspondance file to csv
result_df.to_csv('../data/curated/SA2_to_SAL.csv', index = False)