In [1]:
from pathlib import Path

import pandas as pd
import geopandas as gpd

In [2]:
# define a dictionary of references to use in the land use model, 
# and the name of the region in the input shapefiles in the 'RGN21NM' column
dict_of_regions = {
    'EM': 'East Midlands',
    'EoE': 'East of England',
    'Lon': 'London',
    'NE': 'North East',
    'NW': 'North West',
    'SE': 'South East',
    'SW': 'South West',
    'Wales': 'Wales',
    'WM': 'West Midlands',
    'YH': 'Yorkshire and The Humber'
}

In [3]:
# define path to all shapefiles
input_path = Path(r'F:\Working\Land-Use\SHAPEFILES')

In [4]:
# get LSOA, MSOA, LAD, and GOR shapefile
lsoa_shapefile = input_path / 'LSOA (2021)' / 'LSOA_2021_EnglandWales.shp'
msoa_shapefile = input_path / 'MSOA (2021)' / 'MSOA_2021_EnglandWales.shp'
lad_shapefile = input_path / 'LAD (2021)' / 'LAD_2021_EnglandWales.shp'
gor_shapefile = input_path / 'GOR (2021)' / 'GOR_2021_EnglandWales.shp'

# define master list of files to convert
input_files = [
    lsoa_shapefile, 
    msoa_shapefile, 
    lad_shapefile, 
    gor_shapefile
]

In [5]:
# loop through the input files (covering the whole of england and wales)
for file in input_files:
    data = gpd.read_file(file)
    # also export csv for the zone translations
    data.drop(columns=['geometry']).to_csv(file.with_suffix('.csv'), index=False)
    # subset each england and wales total dataset into different outputs per region
    for reference, region in dict_of_regions.items():
        output = data.loc[data['RGN21NM'] == region]
        output.to_file(file.parent / f'{file.stem}_{reference}.shp')
        output.drop(columns=['geometry']).to_csv(file.parent / f'{file.stem}_{reference}.csv', index=False)