## **Pre-processing of commonly used data files**

In [4]:
import sys; sys.path.append('/home/clair/wwa'); from wwa import *

### **Global elevation**

! wget http://research.jisao.washington.edu/data_sets/elevation/elev.0.25-deg.nc

ds = wrap_lon(xr.open_dataset("../90_data/elev.0.25-deg.nc", decode_times = False).data.squeeze(drop = True))
ds = ds.assign_attrs(long_name = "elevation", units = "m above sea level",                    
                     source = "http://research.jisao.washington.edu/data_sets/elevation/").rename("elev")
ds.to_netcdf("../90_data/elevation-0.25deg-global.nc")

### **GSATs**

#### **UKCP global**

In [34]:
ukcp_gsat = xr.open_dataset("../90_data/ukcp-land-gcm_gsat.nc").sel(ensemble_member = slice(None, 15)).tas_aw.dropna("time", "any")
ukcp_gsat = ukcp_gsat.assign_coords(time = ukcp_gsat.time.dt.year).to_pandas().transpose()

for i in range(15):
    
    csv_fnm = "../00_gsat/ukcp-land-gcm_gsat-aw_sm_"+str(i+1).rjust(2, "0")+".dat"
    
    # create extra header lines for upload to Climate Explorer
    str1 = "# contact :: UKCP-land-gcm GSAT (member "+str(i+1).rjust(2, "0")+"), c.barnes22@imperial.ac.uk"
    str2 = "# temperature [degC] area-weighted global surface air temperature at "+re.sub(".dat", "", csv_fnm.split("/")[-1])
    head = "# year tas_aw"
    
    ! echo "$str1 " > $csv_fnm
    ! echo "$str2" >> $csv_fnm
    ! echo "$head" >> $csv_fnm
    
    ukcp_gsat[[i+1]].to_csv(csv_fnm, sep = " ", mode = "a", header = False)

#### **HighResMIP**

Originally calculated on Jasmin:

```
# loop over all available SST-future runs and select the region of interest, save subfiles of annual time series)

# select models based on existence of SST-future runs: fewer of these than SST-present
for fpath in glob.glob("/badc/cmip6/data/CMIP6/HighResMIP/*/*/highresSST-future/*/Amon/tas/*/latest/"):
    
    print(fpath, end = " - ")

    # list all available files: both SST-future and SST-present
    fl_hist = sorted(glob.glob(re.sub("future", "present", fpath)+"/*.nc"))
    fl_fut = sorted(glob.glob(fpath+"/*.nc"))

    # if no present-day simulations, skip this run
    if len(fl_hist) == 0:
        print("no historical simulations")
        continue
    if len(fl_fut) == 0:
        print("no future simulations")
        continue

    sfpath = "/home/users/clairb/11_wwa/hiResMip_GSATs/subfiles/"
    
    for fl in [fl_hist, fl_fut]:

        sf_fnm = sfpath + fl[0].split("/")[-1][:-9] + fl[-1][-9:]
        if len(glob.glob(sf_fnm)) > 0: continue

        if len(fl) == 1:
            ds = xr.open_dataset(fl[0])
        else:
            ds = xr.open_mfdataset(fl)

        # get area-weighted annual mean, save to netcdf
        area_weights = np.cos(np.deg2rad(ds.lat))
        tas_aw = ds.tas.weighted(area_weights).mean(["lat", "lon"]).resample(time = "AS").mean().reset_coords(drop = True)
        tas_aw.to_netcdf(sf_fnm)

    print("saved")
print("Done.")


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


# now merge annual time series into a single file
sfpath = "/home/users/clairb/11_wwa/hiResMip_GSATs/subfiles/"
for fnm in list(set([re.sub("future", "present", re.sub("_[0-9]{6}.+", "", fnm)) for fnm in glob.glob(sfpath+"*")])):
    
    new_fnm = re.sub("subfiles/", "", glob.glob(fnm+"*")[0][:-9]+glob.glob(re.sub("present", "future", fnm)+"*")[0][-9:])
    if len(glob.glob(new_fnm)) > 0: continue

    ds_hist = xr.open_mfdataset(fnm+"*")
    ds_fut = xr.open_mfdataset(re.sub("present", "future", fnm)+"*")

    # if dates overlap, remove end of historical period (seems to be a one-month overlap in EC-Earth3P-HR)
    if ds_hist.time.max() >= ds_fut.time.min():
        ds_hist = ds_hist.sel(time = slice(None, ds_fut.time.min())).isel(time = slice(None, -1))        

    tas = xr.concat([ds_hist, ds_fut], "time").tas
    tas = xclim.core.units.convert_units_to(tas, "degC")
    tas = xclim.core.calendar.convert_calendar(tas, "default", align_on = "date")

    tas.to_netcdf(new_fnm)
```


In [41]:
# data downloaded from Jasmin needs to be smoothed before saving for upload
for fnm in glob.glob("../90_data/highresmip_gsats/*"):
    
    ds = xr.open_dataset(fnm).tas
    ds = ds.rolling(time = 4).mean().dropna("time", "any")

    fnm = fnm.split("/")[-1]
    csv_fnm = "../00_gsat/highresmip_gsat-aw-sm_"+fnm.split("_")[2]+"-"+fnm.split("_")[4]+".dat"
    
    # create extra header lines for upload to Climate Explorer
    str1 = "# contact :: HighResMIP GSAT ("+fnm.split("_")[2]+"_"+fnm.split("_")[4]+"), c.barnes22@imperial.ac.uk"
    str2 = "# temperature [degC] area-weighted global surface air temperature at "+re.sub(".dat", "", csv_fnm.split("/")[-1])
    head = "# year tas_aw"
    
    ! echo "$str1 " > $csv_fnm
    ! echo "$str2" >> $csv_fnm
    ! echo "$head" >> $csv_fnm
    
    ds.assign_coords(time = ds.time.dt.year).to_dataframe().to_csv(csv_fnm, sep = " ", mode = "a", header = False)