# **Additional scripts used to acquire & pre-process FWI data**
Note: these scripts were run either on JASMIN or on the Imperial HPC, depending on the data source.  
Data sources and any further instructions are given in each section below.


---
# **ERA5**

## **Pre-processing**

In [2]:
tds = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/downloaded_era5_t2m-d2m-snd.nc").sum("expver", keep_attrs = True)
uv = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/downloaded_era5_u10-v10.nc").sum("expver", keep_attrs = True)
j31 = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/downloaded_era5_July-31st.nc")

In [5]:
xr.concat([tds.t2m, j31.t2m], "time").to_netcdf("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/era5_t2m_19400101-20230731.nc")
xr.concat([tds.d2m, j31.d2m], "time").to_netcdf("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/era5_d2m_19400101-20230731.nc")
xr.concat([tds.sd, j31.sd], "time").to_netcdf("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/era5_sd_19400101-20230731.nc")

xr.concat([uv, j31[["u10","v10"]]], "time").to_netcdf("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/era5/era5_uv_19400101-20230731.nc")

## **Calculate FWI for corrected ERA5 data**

In [2]:
tas, sfcWind, snw, hurs = [xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/fwi/era5/"+varnm+"_era5_19400101_20230731.nc")[varnm] for varnm in ["tas", "sfcWind", "sd", "hurs"]]
pr = xr.open_dataset("era5_tp_19400101-20230731_corrected.nc").tp

months = tas.time.dt.month.to_numpy()
days = tas.time.dt.day.to_numpy()

print("  ",datetime.now())
ffmc, dmc, dc, isi, bui, fwi = xr.apply_ufunc(lambda t, p, w, h, s : calculate_fwi(months, days, t, p, w, h, s), tas, pr, sfcWind, hurs, snw, 
                                              input_core_dims = [["time"],["time"],["time"],["time"],["time"]], 
                                              output_core_dims = [["time"],["time"],["time"],["time"],["time"],["time"]], vectorize = True)
print("  ",datetime.now())

da = xr.merge([eval(v).rename(v) for v in ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]])
da.to_netcdf("model_fwi/fwi_era5.nc")

---
# **CanESM2-CanRCM4 large ensemble**
_All processing carried out on Imperial's HPC_

## **Define coordinate reference system**

In [None]:
lens_proj = cartopy.crs.RotatedPole(pole_longitude = 83, pole_latitude = 43)
sf = gpd.read_csv("sf_ejb/").to_crs(lens_proj)

In [None]:
# check that CRS is correct
da = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/can_lens/r1_r1/tas_NAM-44_CCCma-CanESM2_historical-r1_r1i1p1_CCCma-CanRCM4_r2_3hr_1950010103-1951010100.nc").tas
rm = regionmask.mask_3D_geopandas(sf, da.rlon, da.rlat).squeeze(drop = True)

da.isel(time = -1).plot()
sf.boundary.plot(color = "k", ax = plt.gca())

## **Download raw data**

In [2]:
from IPython.display import clear_output
import os, glob

In [None]:
# loop over all GCM & RCM realisations and use wget to download the data into the current directory

fpath = "https://crd-data-donnees-rdc.ec.gc.ca/CCCMA/products/CanSISE/output/CCCma/CanRCM4/"
for gcm in ["r"+str(i) for i in range(1,6)]:
    for rcm in ["r"+str(i) for i in range(1,8)]:
        
        newdir = gcm+"_"+rcm
        if not os.path.exists(newdir):
            ! mkdir -p ${newdir} 
                
        # 3-hourly data
        for varbl in ["hurs", "tas", "pr", "sfcWind"]:
            for y in range(1950, 2051):
                
                fnm = "NAM-44_CCCma-CanESM2_historical-"+gcm+"/3hr/atmos/"+varbl+"/"+rcm+"i1p1/"+varbl+"_NAM-44_CCCma-CanESM2_historical-"+gcm+"_"+rcm+"i1p1_CCCma-CanRCM4_r2_3hr_"+str(y)+"010103-"+str(y+1)+"010100.nc"
                if not os.path.exists(newdir+"/"+fnm.split("/")[-1]):
                    fnm = fpath + fnm+" -P "+newdir
                    ! wget $fnm
                    clear_output(wait = False) 
            
        # daily data
        for varbl in ["snd"]:
            
            fnm = fpath+"NAM-44_CCCma-CanESM2_historical-"+gcm+"/day/atmos/"+varbl+"/"+rcm+"i1p1/"+varbl+"_NAM-44_CCCma-CanESM2_historical-"+gcm+"_"+rcm+"i1p1_CCCma-CanRCM4_r2_day_19500101-19501231.nc -P "+gcm+"_"+rcm
            
            for y in range(1951, 2051, 5):
                fnm = "NAM-44_CCCma-CanESM2_historical-"+gcm+"/day/atmos/"+varbl+"/"+rcm+"i1p1/"+varbl+"_NAM-44_CCCma-CanESM2_historical-"+gcm+"_"+rcm+"i1p1_CCCma-CanRCM4_r2_day_"+str(y)+"0101-"+str(y+4)+"1231.nc"
                if not os.path.exists(gcm+"_"+rcm+"/"+fnm.split("/")[-1]):
                    fnm = fpath+fnm+" -P "+gcm+"_"+rcm
                    ! wget $fnm
                    clear_output(wait = False)
        
print("Done.")

In [None]:
# check that all files are successfully downloaded, no partial data
[sorted([str(round(os.stat(f).st_size/1024/1024))+" "+f for f in glob.glob("r1_r"+str(r+1)+"/*.nc")])[:5] for r in range(7)]

## **Pre-processing**
Initial subsetting carried out using CDO  
Concatenation into single file also done using CDO for tas, hurs, sfcWind, snd - only precip aggregated manually

### **Subset data**

```
#!/bin/bash

# script to subset each subfile of the large ensemble (concatenation done separately)
module load cdo

# temperature (only 18:00)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/can_lens/*/tas_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/${file_in##*/};
    cdo -s selhour,18 -sellonlatbox,280,297,47,59 $file_in $file_out;
done
echo "tas complete"

# relative humidity (only 18:00)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/can_lens/*/hurs_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/${file_in##*/};
    cdo -s selhour,18 -sellonlatbox,280,297,47,59 $file_in $file_out;
done
echo "hurs complete"

# wind speed (only 18:00)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/can_lens/*/sfcWind_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/${file_in##*/};
    cdo -s selhour,18 -sellonlatbox,280,297,47,59 $file_in $file_out;
done
echo "sfcWind complete"

# precip (keep all timesteps)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/can_lens/*/pr_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $file_out;
done
echo "pr complete"

# snow depth (keep all timesteps)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/can_lens/*/snd_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $file_out;
done;
echo "snd complete"
```

### **Concatenate subsetted data (excluding precip)**

```
#!/bin/bash

# script to concatenate subsets into single file per variable (subsetting done separately)
module load cdo

for varnm in tas hurs sfcWind snd; do
    for gcm in `seq 1 5`; do 
        for rcm in `seq 1 7`; do 

            fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/${varnm}_NAM-44_CCCma-CanESM2_historical-r${gcm}_r${rcm}i1p1_CCCma-CanRCM4_r2_*.nc`;
            new_fnm=canesm-canrcm/${varnm}_NAM-44_CCCma-CanESM2_historical-r${gcm}_r${rcm}i1p1_CCCma-CanRCM4_r2.nc;
            cdo cat $fl $new_fnm;

        done;
    done;
done
```

### **Concatenate & aggregate precipitation data**

In [2]:
fpath = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/subfiles/"
path_out = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/"

In [8]:
for gcm in range(6): 
    for rcm in range(8):  
        
        fl = sorted(glob.glob(fpath+"pr_*r"+str(gcm)+"_r"+str(rcm)+"*.nc"))
        new_fnm = path_out+fl[0].split("/")[-1][:-28]+"24hr-1800.nc"
        
        da = xr.open_mfdataset(fl).pr
        da = da.rolling(time = 8).sum().groupby("time.hour")[18]
        da.to_netcdf(new_fnm)

## **Model evaluation**

### **Seasonal cycle**

In [4]:
# temperature
lens = xr.concat([xr.open_dataset(fnm).sel(time = slice("1980", "2020")).tas for fnm in sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/tas_*"))], "member")
lens_sc = convert_units_to(lens.where(regionmask.mask_3D_geopandas(sf, lens.lon, lens.lat).squeeze(drop = True) == 1).mean(["rlat", "rlon"]).groupby("time.dayofyear").mean(), "degC")
lens_sc.to_csv("eval/sc-tas_canesm-canrcm.csv")

# precip
lens = xr.concat([xr.open_dataset(fnm).sel(time = slice("1980", "2020")).pr for fnm in sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm/pr_*"))], "member")
lens_sc = convert_units_to(lens.where(regionmask.mask_3D_geopandas(sf, lens.lon, lens.lat).squeeze(drop = True) == 1).mean(["rlat", "rlon"]).groupby("time.dayofyear").mean(), "mm/day")
lens_sc.to_csv("eval/sc-pr_canesm-canrcm.csv")

### **Spatial pattern**


In [148]:
# lat-lon bounds for Canada
xn = 360-145; xx = 360-50; yn = 40; yx = 80

tmplt = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/can_lens/r1_r1/tas_NAM-44_CCCma-CanESM2_historical-r1_r1i1p1_CCCma-CanRCM4_r2_3hr_1950010103-1951010100.nc")
rm = np.logical_and(np.logical_and(tmplt.lon >= xn, tmplt.lon <= xx), np.logical_and(tmplt.lat >= yn, tmplt.lat <= yx))
rm = rm.where(rm == 1).dropna("rlon", "all").dropna("rlat", "all")

In [164]:
ens_tas = []
for fp in sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/can_lens/r[0-9]_r[0-9]")):
    print(fp, end = "")
    fl = glob.glob(fp+"/tas_*.nc")
    fl = [fnm for fnm in fl if (fnm[-13:-9] >= "1980") and (fnm[-24:-20] <= "2010")]
    
    em = []
    for fnm in fl:
        print(".", end = "")
        em.append(xr.open_dataset(fnm).tas.sel(rlon = rm.rlon, rlat = rm.rlat).groupby("time.season")["JJA"].mean("time"))
    ens_tas.append(xr.concat(em, "time"))
    print("")

lens_tas = convert_units_to(xr.concat(ens_tas, "member").mean("member"), "degC")
lens_tas.to_netcdf("sp-tas_NAM-44_CCCma-CanESM2_CCCma-CanRCM4_r2.nc")

In [174]:
ens_pr = []
for fp in sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/can_lens/r[0-9]_r[0-9]")):
    print(fp, end = "")
    fl = glob.glob(fp+"/pr_*.nc")
    fl = [fnm for fnm in fl if (fnm[-13:-9] >= "1980") and (fnm[-24:-20] <= "2010")]
    
    em = []
    for fnm in fl:
        print(".", end = "")
        da = xr.open_dataset(fnm).pr.sel(rlon = rm.rlon, rlat = rm.rlat)
        em.append(da.sel(time = [m in [3,4,5,6,7] for m in da.time.dt.month]))
    em = xr.concat(em, "time").mean("time")
    ens_pr.append(xr.concat(em, "time"))
    print("")

lens_pr = convert_units_to(xr.concat(zz, "member").mean("member"), "mm/day")
lens_pr.to_netcdf("sp-pr_NAM-44_CCCma-CanESM2_CCCma-CanRCM4_r2.nc")

## **Calculate FWI**

In [30]:
fpath = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/canesm-canrcm//"
fl = sorted(["_".join(fnm.split("_")[-6:-2]) for fnm in glob.glob(fpath+"tas_*.nc")])

In [None]:
for fnm in fl:
    
    print(fnm)
        
    # load data & align timestamps
    tas = convert_units_to(xr.open_mfdataset(fpath+"tas_*"+fnm+"*.nc").tas.load(), "degC").sel(rlat = rm.rlat, rlon = rm.rlon, time = slice("1951", None))
    pr = convert_units_to(xr.open_mfdataset(fpath+"pr_*"+fnm+"*.nc").pr.load(), "mm/day")
    sfcWind = convert_units_to(xr.open_mfdataset(fpath+"sfcWind_*"+fnm+"*.nc").sfcWind.load(), "km/hr")
    hurs = xr.open_mfdataset(fpath+"hurs_*"+fnm+"*.nc").hurs.load()
    snw = convert_units_to(xr.open_mfdataset(fpath+"snd_*"+fnm+"*.nc").snd.load(), "m")
    
    # check that all time series are the same length
    if not all([len(da.time) == len(tas.time) for da in [pr, sfcWind, hurs, snw]]):
        print("   ! length mismatch - skipping")
        continue
        
    # reassign coords to ensure that data will align correctly
    P, W, H, S = [da.sel(rlon = tas.rlon, rlat = tas.rlat, time = slice("1951", None)).assign_coords(time = tas.time) for da in [pr, sfcWind, hurs, snw]]

    months = tas.time.dt.month.to_numpy()
    days = tas.time.dt.day.to_numpy()
    
    print("  ",datetime.now())
    ffmc, dmc, dc, isi, bui, fwi = xr.apply_ufunc(lambda t, p, w, h, s : calculate_fwi(months, days, t, p, w, h, s), tas, P, W, H, S, 
                                                  input_core_dims = [["time"],["time"],["time"],["time"],["time"]], 
                                                  output_core_dims = [["time"],["time"],["time"],["time"],["time"],["time"]], vectorize = True)
    print("  ",datetime.now())
    
    da = xr.merge([eval(v).rename(v) for v in ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]])
    
    # remove the first year as a spin-up period
    da = da.sel(time = slice("1952", None))
    da.to_netcdf("model_fwi/fwi_"+fnm+".nc")

## **Time series for climate explorer**

In [7]:
sf = sf.to_crs(lens_proj)
da = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/can_lens/r1_r1/tas_NAM-44_CCCma-CanESM2_historical-r1_r1i1p1_CCCma-CanRCM4_r2_3hr_1950010103-1951010100.nc").tas
rm = regionmask.mask_3D_geopandas(sf, da.rlon, da.rlat).squeeze(drop = True)

fl = glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/fwi/cordex/*.nc")


In [None]:

for fnm in fl:
    mdl = "_".join(fnm.split("/")[-1].split("_")[1:5])
    fwi = xr.open_dataset(fnm).fwi.where(rm == 1).squeeze(drop = True)
    xydims = ["rlat", "rlon"]

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## FWI7x FOR CLIMATE EXPLORER
    
    # calculate indices & cut off first year
    fwi7x_ts = fwi.rolling(time = 7).mean().resample(time = "AS-JAN").max().isel(time = slice(1, None)).where(rm == 1).mean(xydims).dropna("time", "any")
    fwi7x_ts = fwi7x_ts.assign_coords(time = fwi7x_ts.time.dt.year).rename(time = "year")
    
    # specify filename
    csv_fnm = "ts/canada-wildfires_fwi7x_"+mdl+".dat"
    if len(csv_fnm.split("/")[-1]) > 65: print("! Filename too long: ", csv_fnm)

    # create extra header lines for upload to Climate Explorer 
    str1 = "# contact :: "+mdl+" fwi7x - Canada wildfires 2023, c.barnes22@imperial.ac.uk"
    str2 = "# fwi7x [] spatial mean of maximum of 7-day average of FWI over Eastern James Bay region at "+re.sub(".dat", "", csv_fnm.split("/")[-1])
    head = "# year fwi7x"

    # make .dat file
    ! echo "$str1 " > $csv_fnm
    ! echo "$str2" >> $csv_fnm
    ! echo "$head" >> $csv_fnm
    fwi7x_ts.to_dataframe().to_csv(csv_fnm, sep = " ", mode = "a", header = False)
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## cumDSR FOR CLIMATE EXPLORER
    
    # compute DSR, set missing values to zero and mask off study region
    dsr = (fwi ** 1.71) * 0.0272
    dsr = dsr.where(~np.isnan(dsr), 0).where(rm == 1)
    
    cumdsr_ts = dsr.sel(time = [m <= 7 for m in dsr.time.dt.month]).resample(time = "AS-JAN").sum().rename("cumdsr").where(rm == 1).mean(xydims)
    cumdsr_ts = cumdsr_ts.assign_coords(time = cumdsr_ts.time.dt.year).rename(time = "year")
    
    # specify filename
    csv_fnm = "ts/canada-wildfires_cumDSR_"+mdl+".dat"
    if len(csv_fnm.split("/")[-1]) > 65: print("! Filename too long: ", csv_fnm)

    # create extra header lines for upload to Climate Explorer 
    str1 = "# contact :: "+mdl+" cumulative DSR - Canada wildfires 2023, c.barnes22@imperial.ac.uk"
    str2 = "# cumDSR [] spatial mean of cumulative Daily Severity Rating from Jan-July each year over Eastern James Bay region at "+re.sub(".dat", "", csv_fnm.split("/")[-1])
    head = "# year cumDSR"

    # make .dat file
    ! echo "$str1 " > $csv_fnm
    ! echo "$str2" >> $csv_fnm
    ! echo "$head" >> $csv_fnm
    cumdsr_ts.to_dataframe().to_csv(csv_fnm, sep = " ", mode = "a", header = False)

---
# **CMIP6**
Data retrieved from JASMIN

## **Pre-processing**

In [None]:
# first, check data availability - don't waste time downloading if some variables are missing
has_data = []
for fp in glob.glob("/badc/cmip6/data/CMIP6/ScenarioMIP/*/*/ssp585/r*i1p1f*/day/snw/*/latest"):
    
    fp_hist = "/".join(fp.split("/")[:5])+"/CMIP/"+"/".join(fp.split("/")[6:8])+"/historical/"+"/".join(fp.split("/")[9:])

    fl_snw = sorted(glob.glob(fp_hist+"/*.nc") + glob.glob(fp+"/*.nc"))    
    fl_sfcWind = sorted(glob.glob(re.sub("snw", "sfcWind", fp_hist)+"/*.nc") + glob.glob(re.sub("snw", "sfcWind", fp)+"/*.nc"))

    fp = re.sub("day", "3hr", fp)
    fp_hist = re.sub("day", "3hr", fp_hist)
                
    fl_huss = sorted(glob.glob(re.sub("snw", "huss", fp_hist)+"/*.nc") + glob.glob(re.sub("snw", "tas", fp)+"/*.nc"))
    fl_tas = sorted(glob.glob(re.sub("snw", "tas", fp_hist)+"/*.nc") + glob.glob(re.sub("snw", "tas", fp)+"/*.nc"))
    fl_pr = sorted(glob.glob(re.sub("snw", "pr", fp_hist)+"/*.nc") + glob.glob(re.sub("snw", "pr", fp)+"/*.nc"))
    fl_ps = sorted(glob.glob(re.sub("snw", "ps", fp_hist)+"/*.nc") + glob.glob(re.sub("snw", "ps", fp)+"/*.nc"))

    if any([len(eval("fl_"+v)) == 0 for v in ["snw","sfcWind", "huss","tas","pr","ps"]]): continue
    if any([eval("fl_"+v)[0][-20:-16] > "1950" for v in ["snw","sfcWind"]] + [eval("fl_"+v)[0][-28:-24] > "1950" for v in ["huss","tas","pr","ps"]]): continue
    if any([eval("fl_"+v)[-1][-11:-7] < "2050" for v in ["snw","sfcWind"]] + [eval("fl_"+v)[-1][-15:-11] < "2050" for v in ["huss","tas","pr","ps"]]): continue
        
    has_data.append(fp)

In [None]:
# now loop over models that do have all required variables, and cut out the bits we need
for fp in has_data:

    fp_hist = "/".join(fp.split("/")[:5])+"/CMIP/"+"/".join(fp.split("/")[6:8])+"/historical/"+"/".join(fp.split("/")[9:])
    print(fp_hist)

    fl_snw = sorted(glob.glob(fp_hist+"/*.nc") + glob.glob(fp+"/*.nc"))  
    
    for v in ["snw", "sfcWind"]:

        fl = sorted(glob.glob(re.sub("3hr", "day", re.sub("snw", v, fp_hist))+"/*.nc") + glob.glob(re.sub("3hr", "day", re.sub("snw", v, fp))+"/*.nc"))
        fl = [fnm for fnm in fl if fnm[-20:-16] <= "2050"]
        
        for fnm in fl:
            new_fnm = "cmip6_raw/"+fnm.split("/")[-1]
            if not path.exists(new_fnm):
                da = xr.open_dataset(fnm)[v].squeeze(drop = True).reset_coords(drop = True).sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice(None, "2050"))
                da.to_netcdf("cmip6_raw/"+fnm.split("/")[-1])

    for v in ["huss","tas","ps"]:

        fl = sorted(glob.glob(re.sub("snw", v, fp_hist)+"/*.nc") + glob.glob(re.sub("snw", v, fp)+"/*.nc"))
        fl = [fnm for fnm in fl if fnm[-28:-24] <= "2050"]

        for fnm in fl:
            new_fnm = "cmip6_raw/"+fnm.split("/")[-1]
            if not path.exists(new_fnm):
                da = xr.open_dataset(fnm)[v].squeeze(drop = True).reset_coords(drop = True).sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice(None, "2050"))
                da = da.sel(time = da.time.dt.hour == 12)
                da.to_netcdf("cmip6_raw/"+fnm.split("/")[-1])

    for v in ["pr"]:

        fl = sorted(glob.glob(re.sub("snw", v, fp_hist)+"/*.nc") + glob.glob(re.sub("snw", v, fp)+"/*.nc"))
        fl = [fnm for fnm in fl if fnm[-28:-24] <= "2050"]

        for fnm in fl:
            new_fnm = "cmip6_raw/"+fnm.split("/")[-1]
            if not path.exists(new_fnm):
                da = xr.open_dataset(fnm)[v].squeeze(drop = True).reset_coords(drop = True).sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice(None, "2050"))
                da.to_netcdf("cmip6_raw/"+fnm.split("/")[-1])

### **Aggregate precip**

In [10]:
pr_list = sorted(list(set([re.sub("ssp585", "historical", fnm)[:-29] for fnm in glob.glob("cmip6_raw/pr*")])))
for mdl in pr_list:
    
    print(datetime.now(), mdl)
    
    new_fnm = "cmip6/"+re.sub("3hr","24hr",mdl.split("/")[-1])+".nc"
    if not path.exists(new_fnm): 

        da = xr.open_mfdataset(re.sub("historical", "*", mdl)+"*.nc").pr

        if 12 in da.time.dt.hour:
            h = 12    # assuming that label is given to end of 3hr time slice, so 12:00 corresponds to 9-12
        else:
            h = 10     # assuming that label is given to midpoint of 3hr time slice, so 10:30 corresponds to 9-12
        pr24 = convert_units_to(da.rolling(time = 8).sum().groupby("time.hour")[h], "mm/day")
        pr24.to_netcdf(new_fnm)

### **Relative humidity from huss, tas, ps**

In [10]:
huss_list = sorted(list(set([re.sub("ssp585", "historical", fnm)[:-29] for fnm in glob.glob("cmip6_raw/huss*")])))

for mdl in huss_list:
        
    new_fnm = "cmip6/"+re.sub("huss","hurs",mdl.split("/")[-1])+".nc"
    print(datetime.now(), new_fnm)
    
    if not path.exists(new_fnm): 
        huss = xr.open_mfdataset(re.sub("historical", "*", mdl)+"*.nc").huss
        tas = xr.open_mfdataset(re.sub("huss", "tas", re.sub("historical", "*", mdl)+"*.nc")).tas
        ps = xr.open_mfdataset(re.sub("huss", "ps", re.sub("historical", "*", mdl)+"*.nc")).ps
        hurs = relative_humidity(tas = tas, huss = huss, ps = ps).rename("hurs")
        hurs.to_netcdf(new_fnm)

### **Compile everything else**

In [10]:
# 3hrly variables
for varnm in ["tas"]:
    mdl_list = sorted(list(set([re.sub("ssp585", "historical",fnm)[:-29] for fnm in glob.glob("cmip6_raw/"+varnm+"*")])))

    for mdl in mdl_list:
        
        new_fnm = "cmip6/"+mdl.split("/")[-1]+".nc"
        print(datetime.now(), mdl)

        if not path.exists(new_fnm): 
            da = convert_units_to(xr.open_mfdataset(re.sub("historical", "*", mdl)+"*.nc")[varnm], "degC")
            da.to_netcdf(new_fnm)

In [10]:
# daily variables
for varnm in ["sfcWind", "snw"]:
    mdl_list = sorted(list(set([re.sub("ssp585", "historical",fnm)[:-21] for fnm in glob.glob("cmip6_raw/"+varnm+"*")])))

    for mdl in mdl_list:
        
        new_fnm = "cmip6/"+mdl.split("/")[-1]+".nc"
        print(datetime.now(), new_fnm)

        if not path.exists(new_fnm): 
            da = xr.open_mfdataset(re.sub("historical", "*", mdl)+"*.nc")[varnm]
            da.to_netcdf(new_fnm)

## **Model evaluation**

### **Seasonal cycle**

In [10]:
for varnm in ["tas", "pr"]:
    
    units = {"tas" : "degC", "pr" : "mm/day"}[varnm]
    
    cmip6_fl = sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/cmip6/"+varnm+"tas_*"))
    for i in range(len(cmip6_fl)):
        da = xr.open_dataset(cmip6_fl[i])[varnm].sel(time = slice("1980", "2020"))
        sc = convert_units_to(da.where(regionmask.mask_3D_geopandas(sf, da.lon, da.lat).squeeze(drop = True) == 1).mean(["lat", "lon"]).groupby("time.dayofyear").mean(), units)
        sc.to_csv("eval/sc-"+re.sub(".nc", ".csv", fnm).split("/")[-1])

### **Spatial pattern**

In [10]:
# lat-lon bounds for Canada
xn = 360-145; xx = 360-50; yn = 40; yx = 80

# loop over models that do have all required variables, and cut out the bits we need (save everything separately as otherwise it crunches to a halt)
for varnm in ["tas", "pr"]:    
    for fp in has_data:

        # list historical files (want 1980-2010 climatology only)
        fl = glob.glob(re.sub("snw", varnm, re.sub("3hr", "day", re.sub("ssp585", "historical", re.sub("ScenarioMIP", "CMIP", fp))))+"/*.nc")
        fl = [fnm for fnm in fl if (fnm[-11:-7] >= "1980") and (fnm[-20:-16] <= "2020")]

        da = xr.open_mfdataset(fl)[varnm].sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice("1980", "2010")).groupby("time.month").mean().reset_coords(drop = True)
        da.to_netcdf("spatial/sp-"+re.sub("_[0-9]{8}-[0-9]{8}", "", fl[0].split("/")[-1]))

---
## **Manual calculation of FWI**
_About 5-10 minutes per run_

In [13]:
fpath = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/cmip6/"
fl = ["_".join(fnm.split("_")[-4:]) for fnm in glob.glob(fpath+"tas_*.nc")]

In [None]:
for fnm in fl:
    
    print(fnm)
        
    # load data & align timestamps
    tas = convert_units_to(xr.open_mfdataset(fpath+"tas_*"+fnm).tas.sel(time = slice("1850", "2049")).load(), "degC")
    pr = convert_units_to(xr.open_mfdataset(fpath+"pr_*"+fnm).pr.load(), "mm/day")
    sfcWind = convert_units_to(xr.open_mfdataset(fpath+"sfcWind_*"+fnm).sfcWind.load(), "km/hr")
    hurs = xr.open_mfdataset(fpath+"hurs_*"+fnm).hurs.load()
    snw = convert_units_to(xr.open_mfdataset(fpath+"snw_*"+fnm).snw.load(), "m")
    
    if not all([len(da.time) == len(tas.time) for da in [pr, sfcWind, hurs, snw]]):
        print("   ! length mismatch - skipping")
        continue
    
    P, W, H, S = [da.sel(lon = tas.lon, lat = tas.lat, time = slice("1850", "2049")).assign_coords(time = tas.time) for da in [pr, sfcWind, hurs, snw]]

    months = tas.time.dt.month.to_numpy()
    days = tas.time.dt.day.to_numpy()
    
    print("  ",datetime.now())
    ffmc, dmc, dc, isi, bui, fwi = xr.apply_ufunc(lambda t, p, w, h, s : calculate_fwi(months, days, t, p, w, h, s), tas, P, W, H, S, 
                                                  input_core_dims = [["time"],["time"],["time"],["time"],["time"]], 
                                                  output_core_dims = [["time"],["time"],["time"],["time"],["time"],["time"]], vectorize = True)
    print("  ",datetime.now())
    
    da = xr.merge([eval(v).rename(v) for v in ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]])
    da.to_netcdf("model_fwi/fwi_"+fnm)

---
# **CORDEX**
Raw data downloaded using synda with `local_path_drs_template=%(domain)s%(driving_model)s/%(ensemble)s/%(rcm_name)s/%(variable)s`


## **Pre-processing**

### **Subset data**

```
#!/bin/bash

# script to subset each subfile of the large ensemble (concatenation done separately)
# Chaining fails for some reason, so using an intermediate temporary file
module load cdo

tmp_file=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/tmp.nc

# temperature (only 16:30)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/synda/data/*-*/*/*/tas/tas_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $tmp_file;
    cdo -s selhour,16 $tmp_file $file_out;
done
echo "tas complete"

# relative humidity (only 16:30)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/synda/data/*-*/*/*/hurs/hurs_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $tmp_file;
    cdo -s selhour,16 $tmp_file $file_out;
done
echo "hurs complete"

# wind speed (only 16:30)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/synda/data/*-*/*/*/sfcWind/sfcWind_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $tmp_file;
    cdo -s selhour,16 $tmp_file $file_out;
done
echo "sfcWind complete"

# precip (all)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/synda/data/*-*/*/*/pr/pr_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $file_out;
done
echo "pr complete"

# snow (all)
fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/synda/data/*-*/*/*/snw/snw_*.nc`
for file_in in $fl; do
    file_out=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${file_in##*/};
    cdo -s sellonlatbox,280,297,47,59 $file_in $file_out;
done;
echo "snw complete"
```

### **Concatenate subsetted data (excluding precip)**

```
#!/bin/bash

# script to concatenate subfiles of CORDEX runs
module load cdo

for varnm in tas sfcWind snw hurs; do
    for mdl in HadGEM2 MPI-ESM NorESM; do 
        
        fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${varnm}_*${mdl}*.nc`;
        
        fnm_root=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex/${varnm}_*${mdl}*.nc | head -1`;
        fnm_root=${fnm_root##*/};
        new_fnm=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/cordex/${fnm_root/_1970*/.nc};
        cdo cat $fl $new_fnm; 
    done
done
```

### **Concatenate & aggregate precipitation data**

In [4]:
fpath = "../00_WWA_project_folder/ephemeral/canada_fwi/99_processing/cordex"
cordex_models = ["HadGEM2", "MPI-ESM", "NorESM"]
path_out = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/cordex/"

In [5]:
for mdl in cordex_models:
    fl = glob.glob(fpath+"/pr_*"+mdl+"*.nc")
    new_fnm = path_out+fl[0].split("/")[-1][:-32]+"24hr-1630.nc"
    da = xr.open_mfdataset(fl).pr
    da = da.rolling(time = 8).sum().groupby("time.hour")[16]
    da.to_netcdf(new_fnm)

## **Model evaluation**

### **Seasonal cycle**

In [None]:
for varnm in ["tas", "pr"]:
    
    units = {"tas" : "degC", "pr" : "mm/day"}[varnm]
    
    cordex_fl = sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/cordex/"+varnm+"_*"))
    for i in range(len(cordex_fl)):
        fnm = cordex_fl[i]
        da = xr.open_dataset(fnm).tas.sel(time = slice("1980", "2020"))
        sc = convert_units_to(da.where(regionmask.mask_3D_geopandas(sf, da.lon, da.lat).squeeze(drop = True) == 1).mean(["rlat", "rlon"]).groupby("time.dayofyear").mean(), units)
        sc.to_csv("eval/sc-"+re.sub(".nc", ".csv", fnm).split("/")[-1])


### **Spatial pattern**

In [22]:
# lat-lon bounds for Canada (all runs use same grid in this case)
xn = 360-145; xx = 360-50; yn = 40; yx = 80

tmplt = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/synda/data/MOHC-HadGEM2-ES/r1i1p1/REMO2015/tas/tas_NAM-22_MOHC-HadGEM2-ES_historical_r1i1p1_GERICS-REMO2015_v1_3hr_197001010100-197012302200.nc")
rm = np.logical_and(np.logical_and(tmplt.lon >= xn, tmplt.lon <= xx), np.logical_and(tmplt.lat >= yn, tmplt.lat <= yx))
rm = rm.where(rm == 1).dropna("rlon", "all").dropna("rlat", "all")

In [None]:
for varnm in ["tas", "pr"]:
    
    units = {"tas" : "degC", "pr" : "mm/day"}[varnm]

    for fp in glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/synda/data/*/r1i1p1/REMO2015/"+varnm):

        print(fp, end = "")
        fl = glob.glob(fp+"/*.nc")
        fl = [fnm for fnm in fl if (fnm[-15:-11] >= "1980") & (fnm[-28:-24] <= "2010")]

        em = []
        for fnm in fl:
            print(".", end = "")
            em.append(xr.open_dataset(fnm)[varnm].sel(rlon = rm.rlon, rlat = rm.rlat).groupby("time.season")["JJA"].mean("time"))
        em = convert_units_to(xr.concat(em, "time").mean("time"), units)
        em.to_netcdf("sp-"+"_".join(fl[0].split("/")[-1].split("_")[:-2])+".nc")
        print("")
    clear_output(wait = False)
print("Done.")

## **Calculate FWI**

In [26]:
fpath = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/cordex/"
fl = ["_".join(fnm.split("_")[-7:-2]) for fnm in glob.glob(fpath+"tas_*.nc")]

In [None]:
for fnm in fl:
    
    print(fnm)
        
    # load data & align timestamps
    tas = convert_units_to(xr.open_mfdataset(fpath+"tas_*"+fnm+"*.nc").tas.load(), "degC")
    pr = convert_units_to(xr.open_mfdataset(fpath+"pr_*"+fnm+"*.nc").pr.load(), "mm/day")
    sfcWind = convert_units_to(xr.open_mfdataset(fpath+"sfcWind_*"+fnm+"*.nc").sfcWind.load(), "km/hr")
    hurs = xr.open_mfdataset(fpath+"hurs_*"+fnm+"*.nc").hurs.load()
    snw = convert_units_to(xr.open_mfdataset(fpath+"snw_*"+fnm+"*.nc").snw.load(), "m")
    
    # check that all time series are the same length
    if not all([len(da.time) == len(tas.time) for da in [pr, sfcWind, hurs, snw]]):
        print("   ! length mismatch - skipping")
        continue
        
    P, W, H, S = [da.sel(rlon = tas.rlon, rlat = tas.rlat).assign_coords(time = tas.time) for da in [pr, sfcWind, hurs, snw]]
        
    months = tas.time.dt.month.to_numpy()
    days = tas.time.dt.day.to_numpy()
    
    print("  ",datetime.now())
    ffmc, dmc, dc, isi, bui, fwi = xr.apply_ufunc(lambda t, p, w, h, s : calculate_fwi(months, days, t, p, w, h, s), tas, P, W, H, S, 
                                                  input_core_dims = [["time"],["time"],["time"],["time"],["time"]], 
                                                  output_core_dims = [["time"],["time"],["time"],["time"],["time"],["time"]], vectorize = True)
    print("  ",datetime.now())
    
    da = xr.merge([eval(v).rename(v) for v in ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]])
    da.to_netcdf("model_fwi/fwi_"+fnm+".nc")

---
# **HighresMIP**
Data retrieved from JASMIN

## **Extract variables from archived data**

### **Noon temperatures**

In [None]:
tas_list = [fp for fp in glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/EC-Earth*/highresSST-future/*/3hr/tas/*/latest/") if not "r1i1p1f1" in fp]
for fpath in tas_list:
    
    fl = glob.glob(re.sub("future", "present", fpath)+"/*") + glob.glob(fpath+"/*")
    fl = [fnm for fnm in fl if not "highresmip_raw/"+re.sub("3hr", "1800", fnm).split("/")[-1] in glob.glob("highresmip_raw/tas_*.nc")]

    for fnm in fl:
        new_fnm = "highresmip_raw/"+re.sub("3hr", "1800", fnm).split("/")[-1]
        print(new_fnm)
        da = xr.open_dataset(fnm).sel(lon = slice(xn, xx), lat = slice(yn,yx)).reset_coords(drop = True).tas.groupby("time.hour")[18]
        da.to_netcdf(new_fnm)
    clear_output(wait = False)

In [None]:
for mdl in list(set([re.sub("future", "present", fnm)[:-29] for fnm in glob.glob("highresmip_raw/tas_*")])):
    da = xr.open_mfdataset(re.sub("present", "*", mdl)+"*.nc")
    da.to_netcdf("highresmip/"+mdl.split("/")[-1]+".nc")

### **Noon relative humidity**

In [None]:
# cut out relevant area & timestamps
huss_list = [fp for fp in glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/EC-Earth*/highresSST-future/*/3hr/huss/*/latest/") if not "r1i1p1f1" in fp]
for fpath in huss_list:

    fl = glob.glob(re.sub("future", "present", fpath)+"/*") + glob.glob(fpath+"/*")
    fl = [fnm for fnm in fl if not "highresmip_raw/"+re.sub("huss", "hurs",re.sub("3hr", "1800", fnm)).split("/")[-1] in glob.glob("highresmip_raw/hurs_*.nc")]

    for fnm in fl:
        new_fnm = "highresmip_raw/"+re.sub("huss", "hurs",re.sub("3hr", "1800", fnm)).split("/")[-1]
        print(new_fnm)

        huss, tas, ps = [xr.open_dataset(re.sub("huss", varnm, fnm)).sel(lon = slice(xn, xx), lat = slice(yn,yx), 
                                                                         time = slice(None, "2050")).reset_coords(drop = True)[varnm].groupby("time.hour")[18] 
                         for varnm in ["huss", "tas", "ps"]]

        hurs = relative_humidity(tas = tas, ps = ps, huss = huss).rename("hurs")
        hurs.to_netcdf(new_fnm)
    clear_output(wait = False)

In [None]:
for mdl in list(set([re.sub("future", "present", fnm)[:-29] for fnm in glob.glob("highresmip_raw/hurs_*")])):
    da = xr.open_mfdataset(re.sub("present", "*", mdl)+"*.nc")
    da.to_netcdf("highresmip/"+mdl.split("/")[-1]+".nc")

### **3hourly precip**

In [None]:
# cut out relevant area - can't aggregate until all files are subsetted
pr_list = [fp for fp in glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/EC-Earth*/highresSST-future/*/3hr/pr/*/latest/") if not "r1i1p1f1" in fp]
for fpath in pr_list:

    fl = glob.glob(re.sub("future", "present", fpath)+"/*") + glob.glob(fpath+"/*")
    fl = [fnm for fnm in fl if not "highresmip_raw/"+fnm.split("/")[-1] in glob.glob("highresmip_raw/pr_*.nc")]

    for fnm in fl:
        new_fnm = "highresmip_raw/"+fnm.split("/")[-1]
        print(new_fnm)
        da = xr.open_dataset(fnm).sel(lon = slice(xn, xx), lat = slice(yn,yx)).reset_coords(drop = True).pr
        da.to_netcdf(new_fnm)
    clear_output(wait = False)

In [None]:
for mdl in list(set([re.sub("future", "present", fnm)[:-29] for fnm in glob.glob("highresmip_raw/pr_*")])):
    da = xr.open_mfdataset(re.sub("present", "*", mdl)+"*.nc").pr
    da = da.rolling(time = 8).sum().groupby("time.hour")[18]
    da.to_netcdf("highresmip/"+re.sub("3hr", "24hr-1800", mdl).split("/")[-1]+".nc")

### **Daily snow depth**

In [None]:
snw_list = glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/*/highresSST-future/*/day/snw/*/latest/")
for fpath in snw_list:
    # print(fpath)
    fl_hist = glob.glob(re.sub("future", "present", fpath)+"/*")
    fl_fut = glob.glob(fpath+"/*")

    for fnm in (fl_hist + fl_fut):
        new_fnm = "highresmip_raw/"+fnm.split("/")[-1]
        if not path.exists(new_fnm): 
            print(new_fnm)
        # else:
        #     print(".", end = "")
            da = xr.open_dataset(fnm)
            da = da.sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice(None, "2050")).squeeze(drop = True).reset_coords(drop = True).snw
            da.to_netcdf("highresmip_raw/"+fnm.split("/")[-1])

### **Daily sfcWind**

In [None]:
sfcWind_list = glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/*/highresSST-future/*/day/sfcWind/*/latest/")
for fpath in sfcWind_list:
    # print(fpath)
    fl_hist = glob.glob(re.sub("future", "present", fpath)+"/*")
    fl_fut = glob.glob(fpath+"/*")

    for fnm in (fl_hist + fl_fut):
        new_fnm = "highresmip_raw/"+fnm.split("/")[-1]
        if not path.exists(new_fnm): 
            print(new_fnm)
        # else:
        #     print(".", end = "")
        da = xr.open_dataset(fnm)
        da = da.sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice(None, "2050")).squeeze(drop = True).reset_coords(drop = True).sfcWind
        da.to_netcdf(new_fnm)

## **Model evaluation**

### **Seasonal cycles**

In [None]:
for varnm in ["tas", "pr"]:
    
    units = {"tas" : "degC", "pr" : "mm/day"}[varnm]
    
    highresmip_fl = sorted(glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/highresmip/"+varnm+"/"+varnm+"_*"))
    for i in range(len(highresmip_fl)):
        fnm = highresmip_fl[i]
        da = xr.open_dataset(fnm).tas.sel(time = slice("1980", "2020"))
        sc = convert_units_to(da.where(regionmask.mask_3D_geopandas(sf, da.lon, da.lat).squeeze(drop = True) == 1).mean(["lat", "lon"]).groupby("time.dayofyear").mean(), units)
        sc.to_csv("eval/sc-"+re.sub(".nc", ".csv", fnm).split("/")[-1])

### **Spatial pattern**

In [None]:
# lat-lon bounds for Canada
xn = 360-145; xx = 360-50; yn = 40; yx = 80

mdl_list = [fp for fp in glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/EC-Earth*/highresSST-future/*/3hr/tas/*/latest/") if not "r1i1p1f1" in fp]

# temperatures
for fp in mdl_list:

    # list historical files (want 1980-2010 climatology only)
    fl = glob.glob(re.sub("3hr", "day", re.sub("future", "present", fp))+"*.nc")
    fl = [fnm for fnm in fl if (fnm[-11:-7] >= "1980") and (fnm[-20:-16] <= "2010")]

    da = xr.open_mfdataset(fl).tas.sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice("1980", "2010")).groupby("time.month").mean().reset_coords(drop = True)
    da.to_netcdf("spatial/sp-"+re.sub("_[0-9]{8}-[0-9]{8}", "", fl[0].split("/")[-1]))
    
# precip
for fp in mdl_list:

    # list historical files (want 1980-2010 climatology only)
    fl = glob.glob(re.sub("tas", "pr", re.sub("3hr", "day", re.sub("future", "present", fp)))+"*.nc")
    fl = [fnm for fnm in fl if (fnm[-11:-7] >= "1980") and (fnm[-20:-16] <= "2010")]

    da = xr.open_mfdataset(fl).pr.sel(lon = slice(xn, xx), lat = slice(yn,yx), time = slice("1980", "2010")).groupby("time.month").mean().reset_coords(drop = True)
    da.to_netcdf("spatial/sp-"+re.sub("_[0-9]{8}-[0-9]{8}", "", fl[0].split("/")[-1]))

## **Manual calculation of FWI**
Carried out after transferring raw variables to HPC, but could be done anywhere  
_About 15 minutes per run_

In [None]:
fpath = "../00_WWA_project_folder/ephemeral/canada_fwi/highresmip/"
fl = ["_".join(fnm.split("_")[-4:]) for fnm in glob.glob(fpath+"tas/*.nc")]

for fnm in fl:
    
    print(fnm)
        
    # load data & align timestamps
    tas = convert_units_to(xr.open_mfdataset(fpath+"tas/*"+fnm).tas.load(), "degC")
    pr = convert_units_to(xr.open_mfdataset(fpath+"pr_24h/*"+fnm).pr.load(), "mm/day")
    sfcWind = convert_units_to(xr.open_mfdataset(fpath+"sfcWind/*"+fnm).sfcWind.load(), "km/hr")
    hurs = xr.open_mfdataset(fpath+"hurs/*"+fnm).hurs.load()
    snw = convert_units_to(xr.open_mfdataset(fpath+"snw/*"+fnm).snw.load(), "m")
    
    if not all([len(da.time) == len(tas.time) for da in [pr, sfcWind, hurs, snw]]):
        print("   ! length mismatch - skipping")
        continue
        
    P, W, H, S = [da.sel(lon = tas.lon, lat = tas.lat).assign_coords(time = tas.time) for da in [pr, sfcWind, hurs, snw]]
    
    months = tas.time.dt.month.to_numpy()
    days = tas.time.dt.day.to_numpy()
    
    print("  ",datetime.now())
    ffmc, dmc, dc, isi, bui, fwi = xr.apply_ufunc(lambda t, p, w, h, s : calculate_fwi(months, days, t, p, w, h, s), tas, P, W, H, S, 
                                                  input_core_dims = [["time"],["time"],["time"],["time"],["time"]], 
                                                  output_core_dims = [["time"],["time"],["time"],["time"],["time"],["time"]], vectorize = True)
    print("  ",datetime.now())
    
    da = xr.merge([eval(v).rename(v) for v in ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]])
    da.to_netcdf("model_fwi/fwi_"+fnm)

EC-Earth3P_highresSST-present_r2i1p1f1_gr.nc


### **FWI time series over study region**

In [29]:
for fnm in glob.glob("../00_WWA_project_folder/ephemeral/canada_fwi/fwi/highresmip/fwi_*.nc"):
    
    da = xr.open_dataset(fnm).fwi
    
    # replace any missing values with 0 (otherwise averaging comes out with wrong numbers)
    da = da.where(~np.isnan(da), 0)
    
    # average over study region
    rm_ejb = regionmask.mask_3D_geopandas(sf, da.lon, da.lat).squeeze(drop = True)
    ts = da.where(rm_ejb == 1).mean(["lon", "lat"])
    
    ts.to_netcdf("data/fwi-ejb_highresmip_"+fnm.split("/")[-1])

### **Time series of FWI indices**

In [10]:
fwi7x_ens = []
cumDSR_ens = []

for fnm in glob.glob("fwi/highresmip/fwi_*.nc"):
    
    fwi = xr.open_dataset(fnm).fwi
    rm = regionmask.mask_3D_geopandas(sf, fwi.lon, fwi.lat).squeeze(drop = True)
    
    # get monthly time series of indices for now - then can easily modify seasonal scope if need be (eg if updating for full fire season)
    fwi7x = convert_calendar(fwi.rolling(time = 7).mean().resample(time = "MS").max(), "default", align_on = "date").rename("fwi7x")
    fwi7x_ens.append(fwi7x.where(rm == 1).mean(["lat", "lon"]))
    
    msr = convert_calendar(fwi.resample(time = "MS").sum().rename("msr"), "default", align_on = "date").rename("msr")
    cumDSR_ens.append(msr.where(rm == 1).mean(["lat", "lon"]))
    
fwi7x_ens = convert_calendar(xr.concat(fwi7x_ens, "member"), "default", align_on = "date").sortby("member")
cumDSR_ens = convert_calendar(xr.concat(cumDSR_ens, "member"), "default", align_on = "date").sortby("member")

fwi7x_ens.to_netcdf("00_model-data/fwi7x_highresMIP_EC-Earth.nc")
cumDSR_ens.to_netcdf("00_model-data/msr_highresMIP_EC-Earth.nc")

---
# **UQAM-CRCM5**
Data provided by UQAM and downloaded to HPC

## **Define projection**

In [None]:
proj_crcm5 = cartopy.crs.RotatedPole(pole_longitude = 83, pole_latitude = 42.5)
sf = gpd.read_file("sf_ejb/").to_crs(proj_crcm5)

In [None]:
# check that CRS is correct
da = xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/99_processing/crcm5/var_HR_194901.nc4")
rm = regionmask.mask_3D_geopandas(sf, da.rlon, da.rlat).squeeze(drop = True)

da.isel(time = -1).plot()
sf.boundary.plot(color = "k", ax = plt.gca())

## **Pre-processing**
HR = hurs; I5 = snw; PR = pr; TT = tas   

### **Convert u- and v-vectors to windspeed using CDO**

```
#!/bin/bash

# script to convert u and v wind vectors to windspeed
# Chaining fails for some reason, so using an intermediate temporary file

fl=`ls crcm5/*/*UU*.nc4`
for u_file in $fl; do
    v_file=${u_file/UU/VV};
    outfile=${u_file/UU/WS};
    file_out=crcm5_wind/${outfile##*/};
    echo $file_out;
    cdo -s chname,uas,sfcWind -sqrt -add -sqr -selname,uas $u_file -sqr -selname,vas $v_file $file_out
done
echo "sfcWind complete"
```

### **Concatenate files using CDO**

```
#!/bin/bash

# script to concatenate subfiles of CRCM5 runs
module load cdo
for varnm in TT I5 HR WS PR; do
        
    fl=`ls /rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/crcm5/var_${varnm}_*.nc4`;
    
    new_fnm=/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/canada_fwi/99_processing/crcm5_catted/crcm5_${varnm}_catted.nc;
    cdo cat $fl $new_fnm; 
done
```

### **Aggregate precip**

In [2]:
path_in = "../00_WWA_project_folder/ephemeral/canada_fwi/99_processing/crcm5_catted/"
path_out = "../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/crcm5/"

In [None]:
# aggregate precip to daily
pr = xr.open_dataset(cat_path+"crcm5_PR_catted.nc").pr
pr24 = pr.rolling(time = 24, center = False).sum()
pr24h = pr24.groupby("time.hour")[18]
convert_units_to(pr24h, "mm/day").to_netcdf(path_out+"pr_1800_CRCM5.nc")

### **Convert all units**

In [3]:
# convert all other variables to the required units
convert_units_to(xr.open_dataset(path_in+"crcm5_TT_catted.nc").tas, "degC").to_netcdf(path_out+"tas_1800_CRCM5.nc")
convert_units_to(xr.open_dataset(path_in+"crcm5_HR_catted.nc").hurs, "%").to_netcdf(path_out+"hurs_1800_CRCM5.nc")
convert_units_to(xr.open_dataset(path_in+"crcm5_WS_catted.nc").sfcWind, "km/hr").to_netcdf(path_out+"sfcWind_1800_CRCM5.nc")
convert_units_to(xr.open_dataset(path_in+"crcm5_I5_catted.nc").snw, "m").to_netcdf(path_out+"snw_1800_CRCM5.nc")

## **Calculate FWI**

In [None]:
tas, pr, sfcWind, snw, hurs = [xr.open_dataset("../00_WWA_project_folder/ephemeral/canada_fwi/00_data-for-fwi/crcm5/"+varnm+"_1800_CRCM5.nc")[varnm] for varnm in ["tas", "pr", "sfcWind", "snw", "hurs"]]

months = tas.time.dt.month.to_numpy()
days = tas.time.dt.day.to_numpy()

print("  ",datetime.now())
ffmc, dmc, dc, isi, bui, fwi = xr.apply_ufunc(lambda t, p, w, h, s : calculate_fwi(months, days, t, p, w, h, s), tas, pr, sfcWind, hurs, snw, 
                                              input_core_dims = [["time"],["time"],["time"],["time"],["time"]], 
                                              output_core_dims = [["time"],["time"],["time"],["time"],["time"],["time"]], vectorize = True)
print("  ",datetime.now())

da = xr.merge([eval(v).rename(v) for v in ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]])
da.to_netcdf("model_fwi/fwi_CRCM5.nc")

### **Time series for climate explorer**  

In [15]:
fwi = xr.open_dataset("model_fwi/fwi_CRCM5.nc").fwi

mdl = "CRCM5"
xydims = ["rlat", "rlon"]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## FWI7x FOR CLIMATE EXPLORER

# calculate indices & cut off first year
fwi7x_ts = fwi.rolling(time = 7).mean().resample(time = "AS-JAN").max().isel(time = slice(1, None)).where(rm == 1).mean(xydims).dropna("time", "any")
fwi7x_ts = fwi7x_ts.assign_coords(time = fwi7x_ts.time.dt.year).rename(time = "year")

# specify filename
csv_fnm = "ts/canada-wildfires_fwi7x_"+mdl+".dat"
if len(csv_fnm.split("/")[-1]) > 65: print("! Filename too long: ", csv_fnm)

# create extra header lines for upload to Climate Explorer 
str1 = "# contact :: "+mdl+" fwi7x - Canada wildfires 2023, c.barnes22@imperial.ac.uk"
str2 = "# fwi7x [] spatial mean of maximum of 7-day average of FWI over Eastern James Bay region at "+re.sub(".dat", "", csv_fnm.split("/")[-1])
head = "# year fwi7x"

# make .dat file
! echo "$str1 " > $csv_fnm
! echo "$str2" >> $csv_fnm
! echo "$head" >> $csv_fnm
fwi7x_ts.to_dataframe().to_csv(csv_fnm, sep = " ", mode = "a", header = False)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## cumDSR FOR CLIMATE EXPLORER

# compute DSR, set missing values to zero and mask off study region
dsr = (fwi ** 1.71) * 0.0272
dsr = dsr.where(~np.isnan(dsr), 0).where(rm == 1)

cumdsr_ts = dsr.sel(time = [m <= 7 for m in dsr.time.dt.month]).resample(time = "AS-JAN").sum().rename("cumdsr").where(rm == 1).mean(xydims)
cumdsr_ts = cumdsr_ts.assign_coords(time = cumdsr_ts.time.dt.year).rename(time = "year")

# specify filename
csv_fnm = "ts/canada-wildfires_cumDSR_"+mdl+".dat"
if len(csv_fnm.split("/")[-1]) > 65: print("! Filename too long: ", csv_fnm)

# create extra header lines for upload to Climate Explorer 
str1 = "# contact :: "+mdl+" cumulative DSR - Canada wildfires 2023, c.barnes22@imperial.ac.uk"
str2 = "# cumDSR [] spatial mean of cumulative Daily Severity Rating from Jan-July each year over Eastern James Bay region at "+re.sub(".dat", "", csv_fnm.split("/")[-1])
head = "# year cumDSR"

# make .dat file
! echo "$str1 " > $csv_fnm
! echo "$str2" >> $csv_fnm
! echo "$head" >> $csv_fnm
cumdsr_ts.to_dataframe().to_csv(csv_fnm, sep = " ", mode = "a", header = False)