# Export urban air temperature
- This script is used to export urban air temperature in HadUK-Grid, UKCP18-Local and simulations.
- Simulations: CNTL_GM, LCZ_GM.

In [204]:
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import os
import rioxarray
import cartopy.crs as ccrs
from pyproj import Proj, transform
home_path = '/gws/nopw/j04/duicv/yuansun/'

In [2]:
crs = "EPSG:4326"
shapefile_path = home_path + 'dataset/Office_for_National_Statistics/county_boundary/Counties_and_Unitary_Authorities_December_2023_Boundaries_UK_BSC_4915494739307740134/CTYUA_DEC_2023_UK_BSC.shp'
gdf = gpd.read_file(shapefile_path)
gdf_platecarree = gdf.to_crs(crs)
greater_manchester_counties = ['Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale','Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan']
gdf_greater_manchester = gdf_platecarree[gdf_platecarree['CTYUA23NM'].isin(greater_manchester_counties)]
dissolved = gdf_greater_manchester.dissolve()
boundary = dissolved.geometry[0]

In [3]:
surf_ds = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_def/lnd/hist/regional_analysis_def.clm2.h0.2022-06-01-03600.nc')
lon = surf_ds['lon'].values
lat = surf_ds['lat'].values
lon2d, lat2d = np.meshgrid(lon, lat)
mask = np.zeros_like(lon2d, dtype=bool)
for i in range(lat2d.shape[0]):
    for j in range(lon2d.shape[1]):
        point = Point(lon2d[i, j], lat2d[i, j])
        mask[i, j] = boundary.contains(point)
mask_da = xr.DataArray(
    mask,
    coords={"lat": lat, "lon": lon},
    dims=["lat", "lon"],
    name="mcr_mask"
)
mask_da


# simulation

In [5]:
# urban landunit
sim_list = ['def', 'lcz']
urban_lnd_list = [range(7,10), range(7, 17)]
numurb_list = [3, 10]
numlnd_list = [10, 17]
var = 'TSA'
output_dir = home_path + '0_lcz_mcr/output_analysis/regional/urban_air_temperature/landunit/'
for i, tag in enumerate(sim_list):
    ds_lnd = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_' + tag + '/lnd/hist/regional_analysis_' + tag + '.clm2.h1.2022-06-01-03600.nc')
    lon = ds_lnd.lon
    lat = ds_lnd.lat
    ixy = ds_lnd.land1d_ixy
    jxy = ds_lnd.land1d_jxy
    landtype = ds_lnd.land1d_ityplunit
    nlat = len(lat.values)
    nlon = len(lon.values)
    nland = int(np.max(landtype))+1
    time = ds_lnd.time
    ntim = len(time.values)
    weight = np.full([nland,nlat,nlon],np.nan)
    weight[landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd.land1d_wtgcell.values
    grid_weight = xr.DataArray(weight, dims=("numurbl","lat","lon"))
    grid_weight = grid_weight.assign_coords(numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    area_weight_urban = grid_weight.sel(numurbl = urban_lnd_list[i])
    urban_area_per_grid = ds_lnd.area * ds_lnd.landfrac * area_weight_urban # numurb (7,8,9), lat, lon
    # apply mask to extract MCR area
    total_urban = urban_area_per_grid.where(mask_da).sum(dim = ['lat', 'lon'])
    grid = np.full([ntim,nland,nlat,nlon],np.nan)
    grid[:,landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd[var].values
    grid_dims = xr.DataArray(grid, dims=("time","numurbl","lat","lon"))
    grid_dims = grid_dims.assign_coords(time=time,numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    grid_TSA_U = grid_dims.sel(numurbl=urban_lnd_list[i]).where(mask_da) * urban_area_per_grid
    grid_mean_TSA_U = grid_TSA_U.sum(dim = ['lat', 'lon']) / total_urban - 273.15   
    array = grid_mean_TSA_U.to_dataset(name=var)
    df = array.to_dataframe().reset_index()
    df['time'] = pd.to_datetime(df['time'].apply(lambda t: t.strftime('%Y-%m-%d %H:%M:%S')))
    df['time'] = df['time'].dt.round('h')
    df['time'] = pd.to_datetime(df['time'])
    df.to_csv(output_dir + tag + '.csv', index=False)
    

In [4]:
# urban
sim_list = ['def', 'lcz']
urban_lnd_list = [range(7,10), range(7, 17)]
numurb_list = [3, 10]
numlnd_list = [10, 17]
var = 'TSA'
output_dir = home_path + '0_lcz_mcr/output_analysis/regional/urban_air_temperature/landunit/'
for i, tag in enumerate(sim_list):
    ds_lnd = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_' + tag + '/lnd/hist/regional_analysis_' + tag + '.clm2.h1.2022-06-01-03600.nc')
    lon = ds_lnd.lon
    lat = ds_lnd.lat
    ixy = ds_lnd.land1d_ixy
    jxy = ds_lnd.land1d_jxy
    landtype = ds_lnd.land1d_ityplunit
    nlat = len(lat.values)
    nlon = len(lon.values)
    nland = int(np.max(landtype))+1
    time = ds_lnd.time
    ntim = len(time.values)
    weight = np.full([nland,nlat,nlon],np.nan)
    weight[landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd.land1d_wtgcell.values
    grid_weight = xr.DataArray(weight, dims=("numurbl","lat","lon"))
    grid_weight = grid_weight.assign_coords(numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    area_weight_urban = grid_weight.sel(numurbl = urban_lnd_list[i])
    urban_area_per_grid = ds_lnd.area * ds_lnd.landfrac * area_weight_urban # numurb (7,8,9), lat, lon
    # apply mask to extract MCR area
    total_urban = urban_area_per_grid.where(mask_da).sum(dim = ['numurbl', 'lat', 'lon'])
    grid = np.full([ntim,nland,nlat,nlon],np.nan)
    grid[:,landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd[var].values
    grid_dims = xr.DataArray(grid, dims=("time","numurbl","lat","lon"))
    grid_dims = grid_dims.assign_coords(time=time,numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    grid_TSA_U = grid_dims.sel(numurbl=urban_lnd_list[i]).where(mask_da) * urban_area_per_grid
    grid_mean_TSA_U = grid_TSA_U.sum(dim = ['numurbl', 'lat', 'lon']) / total_urban - 273.15   
    array = grid_mean_TSA_U.to_dataset(name=var)
    df = array.to_dataframe().reset_index()
    df['time'] = pd.to_datetime(df['time'].apply(lambda t: t.strftime('%Y-%m-%d %H:%M:%S')))
    df['time'] = df['time'].dt.round('h')
    df['time'] = pd.to_datetime(df['time'])
    df.to_csv(output_dir + tag + '_urban.csv', index=False)

In [11]:
# rural
sim_list = ['def', 'lcz']
var = 'TSA'
numlnd_list = [10, 17]
rural_lnd = range(1, 3)
output_dir = home_path + '0_lcz_mcr/output_analysis/regional/urban_air_temperature/landunit/'
for i, tag in enumerate(sim_list):
    ds_lnd = xr.open_dataset(home_path + '/0_lcz_mcr/archive/regional_analysis_' + tag + '/lnd/hist/regional_analysis_' + tag + '.clm2.h1.2022-06-01-03600.nc')
    lon = ds_lnd.lon
    lat = ds_lnd.lat
    ixy = ds_lnd.land1d_ixy
    jxy = ds_lnd.land1d_jxy
    landtype = ds_lnd.land1d_ityplunit
    nlat = len(lat.values)
    nlon = len(lon.values)
    nland = int(np.max(landtype))+1
    time = ds_lnd.time
    ntim = len(time.values)
    weight = np.full([nland,nlat,nlon],np.nan)
    weight[landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd.land1d_wtgcell.values
    grid_weight = xr.DataArray(weight, dims=("numurbl","lat","lon"))
    grid_weight = grid_weight.assign_coords(numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    area_weight_rural = grid_weight.sel(numurbl = rural_lnd)
    rural_area_per_grid = ds_lnd.area * ds_lnd.landfrac * area_weight_rural # numurb (1, 2), lat, lon
    # apply mask to extract MCR area
    total_rural = rural_area_per_grid.where(mask_da).sum(dim = ['numurbl', 'lat', 'lon'])
    grid = np.full([ntim,nland,nlat,nlon],np.nan)
    grid[:,landtype.values.astype(int),jxy.values.astype(int) - 1,ixy.values.astype(int) - 1] = ds_lnd[var].values
    grid_dims = xr.DataArray(grid, dims=("time","numurbl","lat","lon"))
    grid_dims = grid_dims.assign_coords(time=time,numurbl=[i for i in range(numlnd_list[i])],lat=lat.values,lon=lon.values)
    grid_TSA_R = grid_dims.sel(numurbl = rural_lnd).where(mask_da) * rural_area_per_grid
    grid_mean_TSA_R = grid_TSA_R.sum(dim = ['numurbl', 'lat', 'lon']) / total_rural - 273.15   
    array = grid_mean_TSA_R.to_dataset(name=var)
    df = array.to_dataframe().reset_index()
    df['time'] = pd.to_datetime(df['time'].apply(lambda t: t.strftime('%Y-%m-%d %H:%M:%S')))
    df['time'] = df['time'].dt.round('h')
    df['time'] = pd.to_datetime(df['time'])
    df.to_csv(output_dir + tag + '_rural.csv', index=False)
    

# HadUK-Grid

In [149]:
res = '1km'
version = 'v1.3.0.ceda'
date = 'v20240514'
haduk_grid_path = f'{home_path}dap.ceda.ac.uk/badc/ukmo-hadobs/data/insitu/MOHC/HadOBS/HadUK-Grid/{version}/{res}/tasmax/day/{date}/tasmax_hadukgrid_uk_{res}_day_20220701-20220731.nc'
ds_haduk = xr.open_dataset(haduk_grid_path)
ds_haduk_tasmax = ds_haduk['tasmax'].sel(time = '2022-07-19T12:00:00.000000000')
ds_haduk_tasmax_drop_dims = ds_haduk_tasmax.drop_vars(['time', 'latitude', 'longitude'])
tasmax_19072022 = ds_haduk_tasmax_drop_dims.rio.write_crs("EPSG:27700")
ds_reprojected = tasmax_19072022.rio.reproject("EPSG:4326")
ds_reprojected

In [148]:
ds_haduk_mcr_sel = ds_reprojected.where((ds_reprojected.y>=lat.min()-0.01) & 
                                        (ds_reprojected.y<=lat.max()+0.01) &
                                        (ds_reprojected.x>=lon.min()-0.01) & 
                                        (ds_reprojected.x<=lon.max()+0.01),drop=True)
ds_haduk_mcr_sel = ds_haduk_mcr_sel.rename({'x': 'lon', 'y': 'lat'})
output_filename = home_path + '0_lcz_mcr/output_analysis/regional/urban_air_temperature/grid/haduk_19_07_tmax_mcr.nc'
if os.path.exists(output_filename):
    os.remove(output_filename)
ds_haduk_mcr_sel.to_netcdf(output_filename)
ds_haduk_mcr_sel

In [150]:
# do not use the latitude and longitude in the dataset
'''
replaced_lat = ds_haduk.latitude[:,0].values
replaced_lon = ds_haduk.longitude[0].values
replaced_haduk = ds_haduk.assign_coords(projection_y_coordinate=replaced_lat, projection_x_coordinate=replaced_lon)
replaced_haduk
'''

'\nreplaced_lat = ds_haduk.latitude[:,0].values\nreplaced_lon = ds_haduk.longitude[0].values\nreplaced_haduk = ds_haduk.assign_coords(projection_y_coordinate=replaced_lat, projection_x_coordinate=replaced_lon)\nreplaced_haduk\n'

# UKCP18

In [213]:
member_list = ['01', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '15']
ukcp_dir = f'{home_path}dap.ceda.ac.uk/badc/ukcp18/data/land-cpm/uk/2.2km/rcp85/'
var = 'tasmax'
timestep = 'day'
tas_max_19072022_list = []
rotated_pole = ccrs.RotatedPole(pole_longitude=177.5, pole_latitude=37.5)
for member in member_list:
    member_filename = f'{ukcp_dir}{member}/{var}/{timestep}/v20210615/{var}_rcp85_land-cpm_uk_2.2km_{member}_{timestep}_20211201-20221130.nc'
    ds_member = xr.open_dataset(member_filename)
    rotated_lats = ds_member['grid_latitude']
    rotated_lons = ds_member['grid_longitude']
    ds_var = ds_member[var][0, 228]
    tas_max_19072022_list.append(ds_var)
ensemble_mean = xr.concat(tas_max_19072022_list, dim='ensemble_member').mean(dim='ensemble_member')  
output_filename = home_path + '0_lcz_mcr/output_analysis/regional/urban_air_temperature/grid/ukcp_19_07_tmax_mcr.nc'
if os.path.exists(output_filename):
    os.remove(output_filename)
ensemble_mean.to_netcdf(output_filename)
ensemble_mean 

In [198]:
tasmax_19072022.rio.reproject("EPSG:4326")