# Export urban heat stress indices by landunits
- This script is used to export hourly mean urban heat stress indices by landunits.
- Simulations: CNTL_GM, LCZ_GM.

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
home_path = '/gws/nopw/j04/duicv/yuansun/'

In [2]:
crs = "EPSG:4326"
shapefile_path = home_path + 'dataset/Office_for_National_Statistics/county_boundary/Counties_and_Unitary_Authorities_December_2023_Boundaries_UK_BSC_4915494739307740134/CTYUA_DEC_2023_UK_BSC.shp'
gdf = gpd.read_file(shapefile_path)
gdf_platecarree = gdf.to_crs(crs)
greater_manchester_counties = ['Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale','Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan']
gdf_greater_manchester = gdf_platecarree[gdf_platecarree['CTYUA23NM'].isin(greater_manchester_counties)]
dissolved = gdf_greater_manchester.dissolve()
boundary = dissolved.geometry[0]


In [3]:
surf_ds = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_def/lnd/hist/regional_analysis_def.clm2.h0.2022-06-01-03600.nc')
lon = surf_ds['lon'].values
lat = surf_ds['lat'].values
lon2d, lat2d = np.meshgrid(lon, lat)
mask = np.zeros_like(lon2d, dtype=bool)
for i in range(lat2d.shape[0]):
    for j in range(lon2d.shape[1]):
        point = Point(lon2d[i, j], lat2d[i, j])
        mask[i, j] = boundary.contains(point)
mask_da = xr.DataArray(
    mask,
    coords={"lat": lat, "lon": lon},
    dims=["lat", "lon"],
    name="mcr_mask"
)
mask_da


In [8]:
# urban landunit
sim_list = ['def', 'lcz']
urban_lnd_list = [range(7,10), range(7, 17)]
numurb_list = [3, 10]
numlnd_list = [10, 17]
var_list = ['HIA', 'SWBGT', 'HUMIDEX']
output_dir = home_path + '0_lcz_mcr/output_analysis/regional/heat_stress/landunit/'
for i, tag in enumerate(sim_list):
    ds_lnd = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_' + tag + '/lnd/hist/regional_analysis_' + tag + '.clm2.h1.2022-06-01-03600.nc')
    lon = ds_lnd.lon
    lat = ds_lnd.lat
    ixy = ds_lnd.land1d_ixy
    jxy = ds_lnd.land1d_jxy
    landtype = ds_lnd.land1d_ityplunit
    nlat = len(lat.values)
    nlon = len(lon.values)
    nland = int(np.max(landtype))+1
    time = ds_lnd.time
    ntim = len(time.values)
    weight = np.full([nland,nlat,nlon],np.nan)
    weight[landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd.land1d_wtgcell.values
    grid_weight = xr.DataArray(weight, dims=("numurbl","lat","lon"))
    grid_weight = grid_weight.assign_coords(numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    area_weight_urban = grid_weight.sel(numurbl = urban_lnd_list[i])
    urban_area_per_grid = ds_lnd.area * ds_lnd.landfrac * area_weight_urban # numurb (7,8,9), lat, lon
    # apply mask to extract MCR area
    total_urban = urban_area_per_grid.where(mask_da).sum(dim = ['lat', 'lon'])
    grid = np.full([ntim,nland,nlat,nlon],np.nan)
    df_heat_stress = pd.DataFrame()
    for j, var in enumerate(var_list):
        grid[:,landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd[var].values
        grid_dims = xr.DataArray(grid, dims=("time","numurbl","lat","lon"))
        grid_dims = grid_dims.assign_coords(time=time,numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
        grid_TSA_U = grid_dims.sel(numurbl=urban_lnd_list[i]).where(mask_da) * urban_area_per_grid
        grid_mean_TSA_U = grid_TSA_U.sum(dim = ['lat', 'lon']) / total_urban   
        array = grid_mean_TSA_U.to_dataset(name=var)
        df = array.to_dataframe().reset_index()
        df['time'] = pd.to_datetime(df['time'].apply(lambda t: t.strftime('%Y-%m-%d %H:%M:%S')))
        df['time'] = df['time'].dt.round('h')
        df['time'] = pd.to_datetime(df['time'])
        if j == 0:
            df_heat_stress = df
        else:
            df_heat_stress = pd.merge(df_heat_stress, df, on=['time', 'numurbl'])            
    df_heat_stress.to_csv(output_dir + tag + '.csv', index=False)
    

In [11]:
# urban
sim_list = ['def', 'lcz']
urban_lnd_list = [range(7,10), range(7, 17)]
numurb_list = [3, 10]
numlnd_list = [10, 17]
var_list = ['HIA', 'SWBGT', 'HUMIDEX']
output_dir = home_path + '0_lcz_mcr/output_analysis/regional/heat_stress/landunit/'
for i, tag in enumerate(sim_list):
    ds_lnd = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_' + tag + '/lnd/hist/regional_analysis_' + tag + '.clm2.h1.2022-06-01-03600.nc')
    lon = ds_lnd.lon
    lat = ds_lnd.lat
    ixy = ds_lnd.land1d_ixy
    jxy = ds_lnd.land1d_jxy
    landtype = ds_lnd.land1d_ityplunit
    nlat = len(lat.values)
    nlon = len(lon.values)
    nland = int(np.max(landtype))+1
    time = ds_lnd.time
    ntim = len(time.values)
    weight = np.full([nland,nlat,nlon],np.nan)
    weight[landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd.land1d_wtgcell.values
    grid_weight = xr.DataArray(weight, dims=("numurbl","lat","lon"))
    grid_weight = grid_weight.assign_coords(numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    area_weight_urban = grid_weight.sel(numurbl = urban_lnd_list[i])
    urban_area_per_grid = ds_lnd.area * ds_lnd.landfrac * area_weight_urban # numurb (7,8,9), lat, lon
    # apply mask to extract MCR area
    total_urban = urban_area_per_grid.where(mask_da).sum(dim = ['numurbl', 'lat', 'lon'])
    grid = np.full([ntim,nland,nlat,nlon],np.nan)
    df_heat_stress = pd.DataFrame()
    for j, var in enumerate(var_list):
        grid[:,landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd[var].values
        grid_dims = xr.DataArray(grid, dims=("time","numurbl","lat","lon"))
        grid_dims = grid_dims.assign_coords(time=time,numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
        grid_TSA_U = grid_dims.sel(numurbl=urban_lnd_list[i]).where(mask_da) * urban_area_per_grid
        grid_mean_TSA_U = grid_TSA_U.sum(dim = ['numurbl', 'lat', 'lon']) / total_urban  
        array = grid_mean_TSA_U.to_dataset(name=var)
        df = array.to_dataframe().reset_index()
        df['time'] = pd.to_datetime(df['time'].apply(lambda t: t.strftime('%Y-%m-%d %H:%M:%S')))
        df['time'] = df['time'].dt.round('h')
        df['time'] = pd.to_datetime(df['time'])
        if j == 0:
            df_heat_stress = df
        else:
            df_heat_stress = pd.merge(df_heat_stress, df, on=['time']) 
    df_heat_stress.to_csv(output_dir + tag + '_urban.csv', index=False)

In [11]:
# rural
'''
sim_list = ['def', 'lcz']
var = 'TSA'
numlnd_list = [10, 17]
rural_lnd = range(1, 3)
output_dir = home_path + '0_lcz_mcr/output_analysis/regional/urban_air_temperature/landunit/'
for i, tag in enumerate(sim_list):
    ds_lnd = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_' + tag + '/lnd/hist/regional_analysis_' + tag + '.clm2.h1.2022-06-01-03600.nc')
    lon = ds_lnd.lon
    lat = ds_lnd.lat
    ixy = ds_lnd.land1d_ixy
    jxy = ds_lnd.land1d_jxy
    landtype = ds_lnd.land1d_ityplunit
    nlat = len(lat.values)
    nlon = len(lon.values)
    nland = int(np.max(landtype))+1
    time = ds_lnd.time
    ntim = len(time.values)
    weight = np.full([nland,nlat,nlon],np.nan)
    weight[landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd.land1d_wtgcell.values
    grid_weight = xr.DataArray(weight, dims=("numurbl","lat","lon"))
    grid_weight = grid_weight.assign_coords(numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    area_weight_rural = grid_weight.sel(numurbl = rural_lnd)
    rural_area_per_grid = ds_lnd.area * ds_lnd.landfrac * area_weight_rural # numurb (1, 2), lat, lon
    # apply mask to extract MCR area
    total_rural = rural_area_per_grid.where(mask_da).sum(dim = ['numurbl', 'lat', 'lon'])
    grid = np.full([ntim,nland,nlat,nlon],np.nan)
    grid[:,landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd[var].values
    grid_dims = xr.DataArray(grid, dims=("time","numurbl","lat","lon"))
    grid_dims = grid_dims.assign_coords(time=time,numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    grid_TSA_R = grid_dims.sel(numurbl = rural_lnd).where(mask_da) * rural_area_per_grid
    grid_mean_TSA_R = grid_TSA_R.sum(dim = ['numurbl', 'lat', 'lon']) / total_rural - 273.15   
    array = grid_mean_TSA_R.to_dataset(name=var)
    df = array.to_dataframe().reset_index()
    df['time'] = pd.to_datetime(df['time'].apply(lambda t: t.strftime('%Y-%m-%d %H:%M:%S')))
    df['time'] = df['time'].dt.round('h')
    df['time'] = pd.to_datetime(df['time'])
    df.to_csv(output_dir + tag + '_rural.csv', index=False)
'''    