# Preprocess ERA5

Preprocess ERA5 files downloaded (and unzipped) using Copernicus' climate data store.

In [2]:
import xarray as xr
import xagg as xa
import numpy as np
import pandas as pd
import os
import re
import glob

from datetime import datetime
from tqdm import tqdm
from funcs_support import get_params, utility_save, get_filepaths
dir_list = get_params()

df = get_filepaths()

## SPEI 

In [12]:
fns = glob.glob(dir_list['raw']+'ERA5/SP*.nc')

fns = {var:[fn for fn in fns if re.search(r'\/'+var+r'\_',fn)]
    for var in np.unique([re.split(r'\_',re.split(r'\/',fn)[-1])[0] for fn in fns])}

In [48]:
for var in fns:
    ds = xr.open_mfdataset(fns[var],chunks='auto')

    ds = xa.fix_ds(ds)
    ds = ds.rename({var:var.lower() for var in ds})

    ds.attrs['SOURCE'] = 'preprocess_ERA5.ipynb'
    ds.attrs['DESCRIPTION'] = 'ERA5 SPI/SPEI standardized to filesystem standards'

    timestr = (re.sub(r'\-','',str(ds.time.min().values)[0:10])+'-'+
           re.sub(r'\-','',str(ds.time.max().values)[0:8])+str(ds.time.dt.daysinmonth[-1].values))
    output_fn = dir_list['raw']+'ERA5/'+var.lower()+'_Amon_ERA5_historical_reanalysis_'+timestr+'_40to40.zarr'

    utility_save(ds,output_fn,save_kwargs = {'zarr_format':2})

    for fn in fns[var]:
        os.system('mv '+fn+' '+re.sub(r'\/ERA5\/','/ERA5/old_files/',fn))

/dx06/data/climate_raw/ERA5/spei1_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spei12_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spei3_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spei6_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spi1_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spi12_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spi3_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!
/dx06/data/climate_raw/ERA5/spi6_Amon_ERA5_historical_reanalysis_19880101-20250831_40to40.zarr saved!


## ERA5 scattering of monthly data

In [3]:
ds = xr.open_dataset(dir_list['raw']+'ERA5/data_stream-moda_stepType-avgua.nc')

In [4]:
ds = xa.fix_ds(ds)
ds = ds.rename({'valid_time':'time'})

In [5]:
rename_dict = {'u10':'uas',
               'v10':'vas',
               't2m':'tas'}
ds = ds.rename(rename_dict)

In [6]:
ds = ds.drop_vars(['number','expver'])

In [8]:
timestr = (re.sub(r'\-','',str(ds.time.min().values)[0:10])+'-'+
           re.sub(r'\-','',str(ds.time.max().values)[0:8])+str(ds.time.dt.daysinmonth[-1].values))
    
for var in ds:
    output_fn = dir_list['raw']+'ERA5/'+var+'_Amon_ERA5_historical_reanalysis_'+timestr+'_WAfr.nc'

    ds_tmp = ds[[var]]
    ds_tmp.attrs['SOURCE'] = 'preprocess_ERA5.ipynb'
    ds_tmp.attrs['DESCRIPTION'] = 'monthly fields downloaded from the Copernicus Datastore and put into filesystem standards.'
    utility_save(ds_tmp,output_fn)

/dx06/data/climate_raw/ERA5/uas_Amon_ERA5_historical_reanalysis_19810101-20251031_WAfr.nc saved!
/dx06/data/climate_raw/ERA5/vas_Amon_ERA5_historical_reanalysis_19810101-20251031_WAfr.nc saved!
/dx06/data/climate_raw/ERA5/tas_Amon_ERA5_historical_reanalysis_19810101-20251031_WAfr.nc saved!
